From: Boyuan Yang Date: Sun, 28 Jul 2024 23:37:19 +0000 (-0400) Subject: New upstream version 1.1.8+ds1 X-Git-Tag: archive/raspbian/1.1.9+ds1-4+rpi1^2^2~15^2~1 X-Git-Url: https://dgit.raspbian.org/%22http:/www.example.com/cgi/%22https://%22%22/%22http:/www.example.com/cgi/%22https:/%22%22?a=commitdiff_plain;h=a0f739180cada9eba6d5e25449c49ae657dfc7e6;p=opencc.git New upstream version 1.1.8+ds1 --- diff --git a/.bazelversion b/.bazelversion new file mode 100644 index 0000000..b26a34e --- /dev/null +++ b/.bazelversion @@ -0,0 +1 @@ +7.2.1 diff --git a/.github/workflows/bazel.yml b/.github/workflows/bazel.yml new file mode 100644 index 0000000..f9ee858 --- /dev/null +++ b/.github/workflows/bazel.yml @@ -0,0 +1,20 @@ +name: Bazel + +on: + push: + branches: [master] + pull_request: + branches: [master] + +jobs: + build-and-test: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + + steps: + - uses: actions/checkout@v4 + - uses: bazelbuild/setup-bazelisk@v3 + - run: bazel build //:opencc + - run: bazel test --test_output=all //src/... //data/... //test/... diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 798150d..51a0935 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -1,23 +1,22 @@ -name: C/C++ CI +name: CMake on: push: - branches: [ master ] + branches: [master] pull_request: - branches: [ master ] + branches: [master] jobs: - build: - - runs-on: ubuntu-latest - + build-and-test: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest] steps: - - uses: actions/checkout@v2 - - name: Install doxygen - run: sudo apt-get install doxygen - - name: make build - run: make build VERBOSE=1 - - name: make test - run: make test VERBOSE=1 - - name: make benchmark - run: make benchmark VERBOSE=1 + - uses: actions/checkout@v4 + - name: make build + run: make build VERBOSE=1 REL_BUILD_DOCUMENTATION=OFF + - name: make test + run: make test VERBOSE=1 + - name: make benchmark + run: make benchmark VERBOSE=1 diff --git a/.github/workflows/mingw.yml b/.github/workflows/mingw.yml index 2038db2..dd99658 100644 --- a/.github/workflows/mingw.yml +++ b/.github/workflows/mingw.yml @@ -1,10 +1,10 @@ -name: mingw-w64 CI +name: mingw-w64 on: push: - branches: [ master ] + branches: [master] pull_request: - branches: [ master ] + branches: [master] jobs: build: @@ -13,20 +13,20 @@ jobs: run: shell: msys2 {0} steps: - - uses: msys2/setup-msys2@v2 - - uses: actions/checkout@v3 - - name: Install dependencies - run: pacman -S --noconfirm base-devel mingw-w64-x86_64-toolchain cmake ninja python - - name: Build with mingw-w64 - run: | - CC=/mingw64/bin/cc CXX=/mingw64/bin/c++ cmake \ - -B build -G Ninja \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=/usr \ - -DENABLE_GTEST=ON \ - -DBUILD_SHARED_LIBS=OFF - cmake --build build - - name: Run test - run: | - cd build - ctest + - uses: msys2/setup-msys2@v2 + - uses: actions/checkout@v4 + - name: Install dependencies + run: pacman -S --noconfirm base-devel mingw-w64-x86_64-toolchain cmake ninja python + - name: Build with mingw-w64 + run: | + CC=/mingw64/bin/cc CXX=/mingw64/bin/c++ cmake \ + -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=/usr \ + -DENABLE_GTEST=ON \ + -DBUILD_SHARED_LIBS=OFF + cmake --build build + - name: Run test + run: | + cd build + ctest diff --git a/.github/workflows/msvc.yml b/.github/workflows/msvc.yml new file mode 100644 index 0000000..9daef8d --- /dev/null +++ b/.github/workflows/msvc.yml @@ -0,0 +1,26 @@ +name: MSVC + +on: + push: + branches: [master] + pull_request: + branches: [master] + +jobs: + build-and-test: + runs-on: windows-latest + strategy: + matrix: + arch: + - amd64 + - amd64_x86 + - amd64_arm64 + steps: + - uses: actions/checkout@v4 + - uses: ilammy/msvc-dev-cmd@v1 + with: + arch: ${{ matrix.arch }} + - name: build + run: ./build.cmd + - name: test + run: ./test.cmd diff --git a/.github/workflows/nodejs.yml b/.github/workflows/nodejs.yml index 8fe23ab..b785769 100644 --- a/.github/workflows/nodejs.yml +++ b/.github/workflows/nodejs.yml @@ -1,25 +1,25 @@ -name: Node.js CI +name: Node.js on: push: - branches: [ master ] + branches: [master] pull_request: - branches: [ master ] + branches: [master] jobs: - build: - - runs-on: ubuntu-latest + build-and-test: + runs-on: ${{ matrix.os }} strategy: matrix: - node-version: [18.x, 20.x] + node-version: [18.x, 20.x, 22.x, latest] + os: [ubuntu-latest, macos-latest] steps: - - uses: actions/checkout@v2 - - name: Use Node.js ${{ matrix.node-version }} - uses: actions/setup-node@v1 - with: - node-version: ${{ matrix.node-version }} - - run: npm ci - - run: npm test + - uses: actions/checkout@v4 + - name: Use Node.js ${{ matrix.node-version }} + uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node-version }} + - run: npm ci + - run: npm test diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index ccf6e78..7ace094 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -1,40 +1,39 @@ -name: Python CI +name: Python on: push: - branches: [ master ] + branches: [master] pull_request: - branches: [ master ] + branches: [master] jobs: unit-test: - runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8, 3.9, '3.10', '3.11', '3.12'] + python-version: [3.8, 3.9, "3.10", "3.11", "3.12"] steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install flake8 pytest wheel setuptools - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --exclude deps --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --exclude deps --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Build and install - run: python setup.py build_ext install - - name: Test with pytest - run: cd python && pytest + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 pytest wheel setuptools + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --exclude deps --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --exclude deps --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Build and install + run: python -m pip install . + - name: Test with pytest + run: pytest python/ test-pypi: strategy: @@ -43,28 +42,28 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 - - name: Build package and upload from docker (Linux) - if: runner.os == 'Linux' - run: | - docker run --rm -v "${PWD}:/opt/OpenCC" \ - -e TWINE_USERNAME=__token__ \ - -e TWINE_PASSWORD=${{ secrets.PYPI_TOKEN }} \ - ubuntu:22.04 /bin/bash /opt/OpenCC/release-pypi-linux.sh testonly + - name: Build package and upload from docker (Linux) + if: runner.os == 'Linux' + run: | + docker run --rm -v "${PWD}:/opt/OpenCC" \ + -e TWINE_USERNAME=__token__ \ + -e TWINE_PASSWORD=${{ secrets.PYPI_TOKEN }} \ + ubuntu:22.04 /bin/bash /opt/OpenCC/release-pypi-linux.sh testonly - - name: Build package and upload (macOS) - if: runner.os == 'macOS' - run: bash release-pypi-macos.sh testonly - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + - name: Build package and upload (macOS) + if: runner.os == 'macOS' + run: bash release-pypi-macos.sh testonly + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} - - name: Build package and upload (Windows) - if: runner.os == 'Windows' - run: | - C:\Miniconda/condabin/conda.bat init powershell - ./release-pypi-windows.cmd testonly - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + - name: Build package and upload (Windows) + if: runner.os == 'Windows' + run: | + C:\Miniconda/condabin/conda.bat init powershell + ./release-pypi-windows.cmd testonly + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} diff --git a/.gitignore b/.gitignore index 66d9a8b..f516487 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ .cproject /.vscode /.mypy_cache +/bazel-* /build /other /doc/html @@ -15,3 +16,4 @@ /xcode /node_modules /*.egg-info +/.venv/ diff --git a/.npmignore b/.npmignore index 50bf6a1..8a56f3b 100644 --- a/.npmignore +++ b/.npmignore @@ -5,12 +5,14 @@ CMakeLists.txt *.pyc *.cmd *.tgz +*.bazel /.github /.vscode /.appveyor.yml /.clang-format /.travis.yml +/bazel-* /Makefile /src/*Test.cpp /src/*TestBase.cpp @@ -20,6 +22,7 @@ CMakeLists.txt /deps/gtest* /deps/pybind* /deps/tclap* +/bazel-* /build /debug /dist diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index b3f4be6..0000000 --- a/.travis.yml +++ /dev/null @@ -1,44 +0,0 @@ -language: node_js - -cache: - npm: true - ccache: true - -node_js: - - stable - - 20 - - 18 - -os: - - linux - - osx - -arch: - - amd64 - - arm64 - -addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - doxygen - -# Install scripts. (runs after repo cloning) -install: - # install modules - - npm install --build-from-source - -# Post-install test scripts. -script: - - export PATH="/usr/lib/ccache/:$PATH" - - if [ $TRAVIS_NODE_VERSION == "stable" ]; then - make test VERBOSE=1; - make package VERBOSE=1; - make benchmark VERBOSE=1; - fi - - - npm test - -after_success: - - npm run deploy diff --git a/BUILD.bazel b/BUILD.bazel new file mode 100644 index 0000000..6c61b49 --- /dev/null +++ b/BUILD.bazel @@ -0,0 +1,19 @@ +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "opencc", + hdrs = [ + "//src:Export.hpp", + "//src:SimpleConverter.hpp", + "//src:opencc.h", + ], + data = [ + "//data/config", + "//data/dictionary:binary_dictionaries", + "//data/dictionary:text_dictionaries", + ], + strip_include_prefix = "src", + deps = [ + "//src:opencc", + ], +) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1acb75a..3393613 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,11 +38,11 @@ option(USE_SYSTEM_RAPIDJSON "Use system version of RapidJSON" OFF) option(USE_SYSTEM_TCLAP "Use system version of TCLAP" OFF) ######## Package information -set (PACKAGE_URL https://github.com/BYVoid/Opencc) -set (PACKAGE_BUGREPORT https://github.com/BYVoid/Opencc/issues) +set (PACKAGE_URL https://github.com/BYVoid/OpenCC) +set (PACKAGE_BUGREPORT https://github.com/BYVoid/OpenCC/issues) set (OPENCC_VERSION_MAJOR 1) set (OPENCC_VERSION_MINOR 1) -set (OPENCC_VERSION_REVISION 7) +set (OPENCC_VERSION_REVISION 8) if (CMAKE_BUILD_TYPE MATCHES Debug) set (version_suffix .Debug) @@ -57,7 +57,7 @@ set(CPACK_SOURCE_PACKAGE_FILE_NAME "${PACKAGE_NAME}-${OPENCC_VERSION_MAJOR}.${OPENCC_VERSION_MINOR}.${OPENCC_VERSION_REVISION}" ) set(CPACK_SOURCE_IGNORE_FILES - "/build/;/test/dict.ocd;/test/dict.txt;/test/dict.bin;/other/;/opencc.xcodeproj/;/.git/;.gitignore;~$;.pyc;${CPACK_SOURCE_IGNORE_FILES}" + "/build/;/test/dict.ocd;/test/dict.txt;/test/dict.bin;/other/;/opencc.xcodeproj/;/.git/;.gitignore;~$;.pyc;/bazel*;/node_modules;/.github;/.pytest_cache;/.vscode;${CPACK_SOURCE_IGNORE_FILES}" ) include(CPack) @@ -79,10 +79,10 @@ endif() ######## Directory set (DIR_PREFIX ${CMAKE_INSTALL_PREFIX}) -set (DIR_INCLUDE ${DIR_PREFIX}/include/) -set (DIR_SHARE ${DIR_PREFIX}/share/) -set (DIR_ETC ${DIR_PREFIX}/etc/) -set (DIR_LIBRARY ${DIR_PREFIX}/lib${LIB_SUFFIX}/) +set (DIR_INCLUDE ${DIR_PREFIX}/include) +set (DIR_SHARE ${DIR_PREFIX}/share) +set (DIR_ETC ${DIR_PREFIX}/etc) +set (DIR_LIBRARY ${DIR_PREFIX}/lib${LIB_SUFFIX}) if (DEFINED SHARE_INSTALL_PREFIX) set (DIR_SHARE ${SHARE_INSTALL_PREFIX}) @@ -100,8 +100,8 @@ if (DEFINED LIB_INSTALL_DIR) set (DIR_LIBRARY ${LIB_INSTALL_DIR}) endif (DEFINED LIB_INSTALL_DIR) -set (DIR_SHARE_OPENCC ${DIR_SHARE}/opencc/) -set (DIR_SHARE_LOCALE ${DIR_SHARE}/locale/) +set (DIR_SHARE_OPENCC ${DIR_SHARE}/opencc) +set (DIR_SHARE_LOCALE ${DIR_SHARE}/locale) ######## Configuration @@ -229,7 +229,7 @@ add_subdirectory(test) if (ENABLE_GTEST) if(NOT USE_SYSTEM_GTEST) - add_subdirectory(deps/gtest-1.12.1) + add_subdirectory(deps/googletest-1.15.0) endif() enable_testing() endif() @@ -248,7 +248,7 @@ if (BUILD_PYTHON) if(USE_SYSTEM_PYBIND11) find_package(pybind11 CONFIG) else() - add_subdirectory(deps/pybind11-2.10.0) + add_subdirectory(deps/pybind11-2.13.1) endif() pybind11_add_module(opencc_clib src/py_opencc.cpp) target_link_libraries(opencc_clib PRIVATE libopencc) diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..38ab0ad --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,9 @@ +graft src +graft deps +graft test +graft data +graft doc +include CMakeLists.txt OpenCCConfig.cmake.in opencc.pc.in README* LICENSE* +global-exclude *~ *.py[cod] *.so +include python/**/__init__.py +graft python/tests diff --git a/MODULE.bazel b/MODULE.bazel new file mode 100644 index 0000000..8dd6621 --- /dev/null +++ b/MODULE.bazel @@ -0,0 +1,15 @@ +"Open Chinese Convert" + +module( + name = "opencc", + version = "1.1.8", + compatibility_level = 1, +) + +bazel_dep(name = "darts-clone", version = "0.32") +bazel_dep(name = "googletest", version = "1.15.0") +bazel_dep(name = "marisa-trie", version = "0.2.6") +bazel_dep(name = "rapidjson", version = "1.1.0") +bazel_dep(name = "rules_cc", version = "0.0.9") +bazel_dep(name = "rules_python", version = "0.34.0") +bazel_dep(name = "tclap", version = "1.2.5") diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock new file mode 100644 index 0000000..cbb5f6a --- /dev/null +++ b/MODULE.bazel.lock @@ -0,0 +1,157 @@ +{ + "lockFileVersion": 11, + "registryFileHashes": { + "https://bcr.bazel.build/bazel_registry.json": "8a28e4aff06ee60aed2a8c281907fb8bcbf3b753c91fb5a5c57da3215d5b3497", + "https://bcr.bazel.build/modules/abseil-cpp/20210324.2/MODULE.bazel": "7cd0312e064fde87c8d1cd79ba06c876bd23630c83466e9500321be55c96ace2", + "https://bcr.bazel.build/modules/abseil-cpp/20211102.0/MODULE.bazel": "70390338f7a5106231d20620712f7cccb659cd0e9d073d1991c038eb9fc57589", + "https://bcr.bazel.build/modules/abseil-cpp/20230125.1/MODULE.bazel": "89047429cb0207707b2dface14ba7f8df85273d484c2572755be4bab7ce9c3a0", + "https://bcr.bazel.build/modules/abseil-cpp/20230802.0.bcr.1/MODULE.bazel": "1c8cec495288dccd14fdae6e3f95f772c1c91857047a098fad772034264cc8cb", + "https://bcr.bazel.build/modules/abseil-cpp/20230802.0/MODULE.bazel": "d253ae36a8bd9ee3c5955384096ccb6baf16a1b1e93e858370da0a3b94f77c16", + "https://bcr.bazel.build/modules/abseil-cpp/20240116.2/MODULE.bazel": "73939767a4686cd9a520d16af5ab440071ed75cec1a876bf2fcfaf1f71987a16", + "https://bcr.bazel.build/modules/abseil-cpp/20240116.2/source.json": "750d5e29326fb59cbe61116a7b803c8a1d0a7090a9c8ed89888d188e3c473fc7", + "https://bcr.bazel.build/modules/apple_support/1.15.1/MODULE.bazel": "a0556fefca0b1bb2de8567b8827518f94db6a6e7e7d632b4c48dc5f865bc7c85", + "https://bcr.bazel.build/modules/apple_support/1.15.1/source.json": "517f2b77430084c541bc9be2db63fdcbb7102938c5f64c17ee60ffda2e5cf07b", + "https://bcr.bazel.build/modules/apple_support/1.5.0/MODULE.bazel": "50341a62efbc483e8a2a6aec30994a58749bd7b885e18dd96aa8c33031e558ef", + "https://bcr.bazel.build/modules/bazel_features/1.1.1/MODULE.bazel": "27b8c79ef57efe08efccbd9dd6ef70d61b4798320b8d3c134fd571f78963dbcd", + "https://bcr.bazel.build/modules/bazel_features/1.11.0/MODULE.bazel": "f9382337dd5a474c3b7d334c2f83e50b6eaedc284253334cf823044a26de03e8", + "https://bcr.bazel.build/modules/bazel_features/1.11.0/source.json": "c9320aa53cd1c441d24bd6b716da087ad7e4ff0d9742a9884587596edfe53015", + "https://bcr.bazel.build/modules/bazel_features/1.9.1/MODULE.bazel": "8f679097876a9b609ad1f60249c49d68bfab783dd9be012faf9d82547b14815a", + "https://bcr.bazel.build/modules/bazel_skylib/1.0.3/MODULE.bazel": "bcb0fd896384802d1ad283b4e4eb4d718eebd8cb820b0a2c3a347fb971afd9d8", + "https://bcr.bazel.build/modules/bazel_skylib/1.1.1/MODULE.bazel": "1add3e7d93ff2e6998f9e118022c84d163917d912f5afafb3058e3d2f1545b5e", + "https://bcr.bazel.build/modules/bazel_skylib/1.2.0/MODULE.bazel": "44fe84260e454ed94ad326352a698422dbe372b21a1ac9f3eab76eb531223686", + "https://bcr.bazel.build/modules/bazel_skylib/1.2.1/MODULE.bazel": "f35baf9da0efe45fa3da1696ae906eea3d615ad41e2e3def4aeb4e8bc0ef9a7a", + "https://bcr.bazel.build/modules/bazel_skylib/1.3.0/MODULE.bazel": "20228b92868bf5cfc41bda7afc8a8ba2a543201851de39d990ec957b513579c5", + "https://bcr.bazel.build/modules/bazel_skylib/1.4.1/MODULE.bazel": "a0dcb779424be33100dcae821e9e27e4f2901d9dfd5333efe5ac6a8d7ab75e1d", + "https://bcr.bazel.build/modules/bazel_skylib/1.5.0/MODULE.bazel": "32880f5e2945ce6a03d1fbd588e9198c0a959bb42297b2cfaf1685b7bc32e138", + "https://bcr.bazel.build/modules/bazel_skylib/1.6.1/MODULE.bazel": "8fdee2dbaace6c252131c00e1de4b165dc65af02ea278476187765e1a617b917", + "https://bcr.bazel.build/modules/bazel_skylib/1.6.1/source.json": "082ed5f9837901fada8c68c2f3ddc958bb22b6d654f71dd73f3df30d45d4b749", + "https://bcr.bazel.build/modules/buildozer/7.1.2/MODULE.bazel": "2e8dd40ede9c454042645fd8d8d0cd1527966aa5c919de86661e62953cd73d84", + "https://bcr.bazel.build/modules/buildozer/7.1.2/source.json": "c9028a501d2db85793a6996205c8de120944f50a0d570438fcae0457a5f9d1f8", + "https://bcr.bazel.build/modules/darts-clone/0.32/MODULE.bazel": "bdd235e31dd7f2538ff8b3ab3ef09c831349b141afca587d32b487d75c502361", + "https://bcr.bazel.build/modules/darts-clone/0.32/source.json": "c65158c152e276f3c59dc0fc0fa746f1ff601e23b0a09812e024fe563e4dc99c", + "https://bcr.bazel.build/modules/google_benchmark/1.8.2/MODULE.bazel": "a70cf1bba851000ba93b58ae2f6d76490a9feb74192e57ab8e8ff13c34ec50cb", + "https://bcr.bazel.build/modules/googletest/1.11.0/MODULE.bazel": "3a83f095183f66345ca86aa13c58b59f9f94a2f81999c093d4eeaa2d262d12f4", + "https://bcr.bazel.build/modules/googletest/1.14.0.bcr.1/MODULE.bazel": "22c31a561553727960057361aa33bf20fb2e98584bc4fec007906e27053f80c6", + "https://bcr.bazel.build/modules/googletest/1.14.0/MODULE.bazel": "cfbcbf3e6eac06ef9d85900f64424708cc08687d1b527f0ef65aa7517af8118f", + "https://bcr.bazel.build/modules/googletest/1.15.0/MODULE.bazel": "c4515ecca65378b9035bb6ccee496c1a362b31311c2380ca7740a73bfdaccb51", + "https://bcr.bazel.build/modules/googletest/1.15.0/source.json": "c235880d343a5758da581c839653abeebb5f5cd9d987ff879ca68bf08a59f879", + "https://bcr.bazel.build/modules/libpfm/4.11.0/MODULE.bazel": "45061ff025b301940f1e30d2c16bea596c25b176c8b6b3087e92615adbd52902", + "https://bcr.bazel.build/modules/marisa-trie/0.2.6/MODULE.bazel": "3a4e187ae58831081fe6b38d3f58f44e9d929164b2c1bc970821f076a023dcb6", + "https://bcr.bazel.build/modules/marisa-trie/0.2.6/source.json": "a9670e7b0889be633edb31e9aa4ffffa6a562ead1c576cf8ff17f474e54d2c59", + "https://bcr.bazel.build/modules/platforms/0.0.10/MODULE.bazel": "8cb8efaf200bdeb2150d93e162c40f388529a25852b332cec879373771e48ed5", + "https://bcr.bazel.build/modules/platforms/0.0.10/source.json": "f22828ff4cf021a6b577f1bf6341cb9dcd7965092a439f64fc1bb3b7a5ae4bd5", + "https://bcr.bazel.build/modules/platforms/0.0.4/MODULE.bazel": "9b328e31ee156f53f3c416a64f8491f7eb731742655a47c9eec4703a71644aee", + "https://bcr.bazel.build/modules/platforms/0.0.5/MODULE.bazel": "5733b54ea419d5eaf7997054bb55f6a1d0b5ff8aedf0176fef9eea44f3acda37", + "https://bcr.bazel.build/modules/platforms/0.0.6/MODULE.bazel": "ad6eeef431dc52aefd2d77ed20a4b353f8ebf0f4ecdd26a807d2da5aa8cd0615", + "https://bcr.bazel.build/modules/platforms/0.0.7/MODULE.bazel": "72fd4a0ede9ee5c021f6a8dd92b503e089f46c227ba2813ff183b71616034814", + "https://bcr.bazel.build/modules/platforms/0.0.8/MODULE.bazel": "9f142c03e348f6d263719f5074b21ef3adf0b139ee4c5133e2aa35664da9eb2d", + "https://bcr.bazel.build/modules/platforms/0.0.9/MODULE.bazel": "4a87a60c927b56ddd67db50c89acaa62f4ce2a1d2149ccb63ffd871d5ce29ebc", + "https://bcr.bazel.build/modules/protobuf/21.7/MODULE.bazel": "a5a29bb89544f9b97edce05642fac225a808b5b7be74038ea3640fae2f8e66a7", + "https://bcr.bazel.build/modules/protobuf/23.1/MODULE.bazel": "88b393b3eb4101d18129e5db51847cd40a5517a53e81216144a8c32dfeeca52a", + "https://bcr.bazel.build/modules/protobuf/24.4/MODULE.bazel": "7bc7ce5f2abf36b3b7b7c8218d3acdebb9426aeb35c2257c96445756f970eb12", + "https://bcr.bazel.build/modules/protobuf/24.4/source.json": "ace4b8c65d4cfe64efe544f09fc5e5df77faf3a67fbb29c5341e0d755d9b15d6", + "https://bcr.bazel.build/modules/protobuf/3.19.0/MODULE.bazel": "6b5fbb433f760a99a22b18b6850ed5784ef0e9928a72668b66e4d7ccd47db9b0", + "https://bcr.bazel.build/modules/protobuf/3.19.6/MODULE.bazel": "9233edc5e1f2ee276a60de3eaa47ac4132302ef9643238f23128fea53ea12858", + "https://bcr.bazel.build/modules/pybind11_bazel/2.11.1/MODULE.bazel": "88af1c246226d87e65be78ed49ecd1e6f5e98648558c14ce99176da041dc378e", + "https://bcr.bazel.build/modules/pybind11_bazel/2.12.0/MODULE.bazel": "e6f4c20442eaa7c90d7190d8dc539d0ab422f95c65a57cc59562170c58ae3d34", + "https://bcr.bazel.build/modules/pybind11_bazel/2.12.0/source.json": "6900fdc8a9e95866b8c0d4ad4aba4d4236317b5c1cd04c502df3f0d33afed680", + "https://bcr.bazel.build/modules/rapidjson/1.1.0/MODULE.bazel": "0367b53ebffe290358729893e7c435da379397738e09ae45c845e1e4f59fa3fc", + "https://bcr.bazel.build/modules/rapidjson/1.1.0/source.json": "0e1c31420d28513742394cd6ab5c4ed004e097670fc85fcf111cdcab96f381bb", + "https://bcr.bazel.build/modules/re2/2023-09-01/MODULE.bazel": "cb3d511531b16cfc78a225a9e2136007a48cf8a677e4264baeab57fe78a80206", + "https://bcr.bazel.build/modules/re2/2024-07-02/MODULE.bazel": "0eadc4395959969297cbcf31a249ff457f2f1d456228c67719480205aa306daa", + "https://bcr.bazel.build/modules/re2/2024-07-02/source.json": "547d0111a9d4f362db32196fef805abbf3676e8d6afbe44d395d87816c1130ca", + "https://bcr.bazel.build/modules/rules_cc/0.0.1/MODULE.bazel": "cb2aa0747f84c6c3a78dad4e2049c154f08ab9d166b1273835a8174940365647", + "https://bcr.bazel.build/modules/rules_cc/0.0.2/MODULE.bazel": "6915987c90970493ab97393024c156ea8fb9f3bea953b2f3ec05c34f19b5695c", + "https://bcr.bazel.build/modules/rules_cc/0.0.5/MODULE.bazel": "be41f87587998fe8890cd82ea4e848ed8eb799e053c224f78f3ff7fe1a1d9b74", + "https://bcr.bazel.build/modules/rules_cc/0.0.6/MODULE.bazel": "abf360251023dfe3efcef65ab9d56beefa8394d4176dd29529750e1c57eaa33f", + "https://bcr.bazel.build/modules/rules_cc/0.0.8/MODULE.bazel": "964c85c82cfeb6f3855e6a07054fdb159aced38e99a5eecf7bce9d53990afa3e", + "https://bcr.bazel.build/modules/rules_cc/0.0.9/MODULE.bazel": "836e76439f354b89afe6a911a7adf59a6b2518fafb174483ad78a2a2fde7b1c5", + "https://bcr.bazel.build/modules/rules_cc/0.0.9/source.json": "1f1ba6fea244b616de4a554a0f4983c91a9301640c8fe0dd1d410254115c8430", + "https://bcr.bazel.build/modules/rules_foreign_cc/0.9.0/MODULE.bazel": "c9e8c682bf75b0e7c704166d79b599f93b72cfca5ad7477df596947891feeef6", + "https://bcr.bazel.build/modules/rules_java/4.0.0/MODULE.bazel": "5a78a7ae82cd1a33cef56dc578c7d2a46ed0dca12643ee45edbb8417899e6f74", + "https://bcr.bazel.build/modules/rules_java/7.1.0/MODULE.bazel": "30d9135a2b6561c761bd67bd4990da591e6bdc128790ce3e7afd6a3558b2fb64", + "https://bcr.bazel.build/modules/rules_java/7.6.1/MODULE.bazel": "2f14b7e8a1aa2f67ae92bc69d1ec0fa8d9f827c4e17ff5e5f02e91caa3b2d0fe", + "https://bcr.bazel.build/modules/rules_java/7.6.1/source.json": "8f3f3076554e1558e8e468b2232991c510ecbcbed9e6f8c06ac31c93bcf38362", + "https://bcr.bazel.build/modules/rules_jvm_external/4.4.2/MODULE.bazel": "a56b85e418c83eb1839819f0b515c431010160383306d13ec21959ac412d2fe7", + "https://bcr.bazel.build/modules/rules_jvm_external/5.1/MODULE.bazel": "33f6f999e03183f7d088c9be518a63467dfd0be94a11d0055fe2d210f89aa909", + "https://bcr.bazel.build/modules/rules_jvm_external/5.1/source.json": "5abb45cc9beb27b77aec6a65a11855ef2b55d95dfdc358e9f312b78ae0ba32d5", + "https://bcr.bazel.build/modules/rules_license/0.0.3/MODULE.bazel": "627e9ab0247f7d1e05736b59dbb1b6871373de5ad31c3011880b4133cafd4bd0", + "https://bcr.bazel.build/modules/rules_license/0.0.7/MODULE.bazel": "088fbeb0b6a419005b89cf93fe62d9517c0a2b8bb56af3244af65ecfe37e7d5d", + "https://bcr.bazel.build/modules/rules_license/0.0.7/source.json": "355cc5737a0f294e560d52b1b7a6492d4fff2caf0bef1a315df5a298fca2d34a", + "https://bcr.bazel.build/modules/rules_pkg/0.7.0/MODULE.bazel": "df99f03fc7934a4737122518bb87e667e62d780b610910f0447665a7e2be62dc", + "https://bcr.bazel.build/modules/rules_pkg/0.7.0/source.json": "c2557066e0c0342223ba592510ad3d812d4963b9024831f7f66fd0584dd8c66c", + "https://bcr.bazel.build/modules/rules_proto/4.0.0/MODULE.bazel": "a7a7b6ce9bee418c1a760b3d84f83a299ad6952f9903c67f19e4edd964894e06", + "https://bcr.bazel.build/modules/rules_proto/5.3.0-21.7/MODULE.bazel": "e8dff86b0971688790ae75528fe1813f71809b5afd57facb44dad9e8eca631b7", + "https://bcr.bazel.build/modules/rules_proto/6.0.0-rc1/MODULE.bazel": "1e5b502e2e1a9e825eef74476a5a1ee524a92297085015a052510b09a1a09483", + "https://bcr.bazel.build/modules/rules_proto/6.0.0-rc1/source.json": "8d8448e71706df7450ced227ca6b3812407ff5e2ccad74a43a9fbe79c84e34e0", + "https://bcr.bazel.build/modules/rules_python/0.10.2/MODULE.bazel": "cc82bc96f2997baa545ab3ce73f196d040ffb8756fd2d66125a530031cd90e5f", + "https://bcr.bazel.build/modules/rules_python/0.22.1/MODULE.bazel": "26114f0c0b5e93018c0c066d6673f1a2c3737c7e90af95eff30cfee38d0bbac7", + "https://bcr.bazel.build/modules/rules_python/0.25.0/MODULE.bazel": "72f1506841c920a1afec76975b35312410eea3aa7b63267436bfb1dd91d2d382", + "https://bcr.bazel.build/modules/rules_python/0.29.0/MODULE.bazel": "2ac8cd70524b4b9ec49a0b8284c79e4cd86199296f82f6e0d5da3f783d660c82", + "https://bcr.bazel.build/modules/rules_python/0.31.0/MODULE.bazel": "93a43dc47ee570e6ec9f5779b2e64c1476a6ce921c48cc9a1678a91dd5f8fd58", + "https://bcr.bazel.build/modules/rules_python/0.33.2/MODULE.bazel": "3e036c4ad8d804a4dad897d333d8dce200d943df4827cb849840055be8d2e937", + "https://bcr.bazel.build/modules/rules_python/0.34.0/MODULE.bazel": "1d623d026e075b78c9fde483a889cda7996f5da4f36dffb24c246ab30f06513a", + "https://bcr.bazel.build/modules/rules_python/0.34.0/source.json": "113116e287eec64a7d005a9db44865d810499fdc4f621e352aff58214f5ea2d8", + "https://bcr.bazel.build/modules/rules_python/0.4.0/MODULE.bazel": "9208ee05fd48bf09ac60ed269791cf17fb343db56c8226a720fbb1cdf467166c", + "https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8", + "https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c", + "https://bcr.bazel.build/modules/stardoc/0.5.3/source.json": "cd53fe968dc8cd98197c052db3db6d82562960c87b61e7a90ee96f8e4e0dda97", + "https://bcr.bazel.build/modules/tclap/1.2.5/MODULE.bazel": "d91b779402516ce378283a867e5af24bcc37a8cf80934bf7f9679d082eaded53", + "https://bcr.bazel.build/modules/tclap/1.2.5/source.json": "8e519d780d8bb314bbe87af7aa50f0ba7fe68e2450e6df97f860ed105aecd41e", + "https://bcr.bazel.build/modules/upb/0.0.0-20220923-a547704/MODULE.bazel": "7298990c00040a0e2f121f6c32544bab27d4452f80d9ce51349b1a28f3005c43", + "https://bcr.bazel.build/modules/upb/0.0.0-20230516-61a97ef/MODULE.bazel": "c0df5e35ad55e264160417fd0875932ee3c9dda63d9fccace35ac62f45e1b6f9", + "https://bcr.bazel.build/modules/upb/0.0.0-20230516-61a97ef/source.json": "b2150404947339e8b947c6b16baa39fa75657f4ddec5e37272c7b11c7ab533bc", + "https://bcr.bazel.build/modules/zlib/1.2.11/MODULE.bazel": "07b389abc85fdbca459b69e2ec656ae5622873af3f845e1c9d80fe179f3effa0", + "https://bcr.bazel.build/modules/zlib/1.2.12/MODULE.bazel": "3b1a8834ada2a883674be8cbd36ede1b6ec481477ada359cd2d3ddc562340b27", + "https://bcr.bazel.build/modules/zlib/1.3/MODULE.bazel": "6a9c02f19a24dcedb05572b2381446e27c272cd383aed11d41d99da9e3167a72", + "https://bcr.bazel.build/modules/zlib/1.3/source.json": "b6b43d0737af846022636e6e255fd4a96fee0d34f08f3830e6e0bac51465c37c" + }, + "selectedYankedVersions": {}, + "moduleExtensions": { + "@@apple_support~//crosstool:setup.bzl%apple_cc_configure_extension": { + "general": { + "bzlTransitiveDigest": "ltCGFbl/LQQZXn/LEMXfKX7pGwyqNiOCHcmiQW0tmjM=", + "usagesDigest": "RkqDb8JtSSm4rLheCLMw/Dx3QQE7dZbl4taOVEYaQZg=", + "recordedFileInputs": {}, + "recordedDirentsInputs": {}, + "envVariables": {}, + "generatedRepoSpecs": { + "local_config_apple_cc": { + "bzlFile": "@@apple_support~//crosstool:setup.bzl", + "ruleClassName": "_apple_cc_autoconf", + "attributes": {} + }, + "local_config_apple_cc_toolchains": { + "bzlFile": "@@apple_support~//crosstool:setup.bzl", + "ruleClassName": "_apple_cc_autoconf_toolchains", + "attributes": {} + } + }, + "recordedRepoMappingEntries": [ + [ + "apple_support~", + "bazel_tools", + "bazel_tools" + ] + ] + } + }, + "@@platforms//host:extension.bzl%host_platform": { + "general": { + "bzlTransitiveDigest": "xelQcPZH8+tmuOHVjL9vDxMnnQNMlwj0SlvgoqBkm4U=", + "usagesDigest": "V1R2Y2oMxKNfx2WCWpSCaUV1WefW1o8HZGm3v1vHgY4=", + "recordedFileInputs": {}, + "recordedDirentsInputs": {}, + "envVariables": {}, + "generatedRepoSpecs": { + "host_platform": { + "bzlFile": "@@platforms//host:extension.bzl", + "ruleClassName": "host_platform_repo", + "attributes": {} + } + }, + "recordedRepoMappingEntries": [] + } + } + } +} diff --git a/Makefile b/Makefile index ff8bc89..db5af82 100644 --- a/Makefile +++ b/Makefile @@ -17,13 +17,14 @@ # PREFIX = /usr +REL_BUILD_DOCUMENTATION ?= ON -.PHONY: build clean node test xcode-build +.PHONY: bazel build clean node test xcode-build build: mkdir -p build/rel (cd build/rel; cmake \ - -DBUILD_DOCUMENTATION:BOOL=ON \ + -DBUILD_DOCUMENTATION:BOOL=${REL_BUILD_DOCUMENTATION} \ -DENABLE_GTEST:BOOL=OFF \ -DENABLE_BENCHMARK:BOOL=OFF \ -DCMAKE_BUILD_TYPE=Release \ @@ -78,13 +79,13 @@ xcode-build: xcodebuild build) python-build: - python setup.py build_ext + echo "No need to build" python-install: python-build - python setup.py install + python -m pip install . python-dist: python-build - python setup.py bdist_wheel + python -m build python-test: python-build cd python; pytest . @@ -97,7 +98,14 @@ format: | xargs clang-format -i clean: - rm -rf build xcode python/opencc/clib *.egg-info + rm -rf build xcode python/opencc/clib *.egg-info bazel-* install: build make -C build/rel install VERBOSE=${VERBOSE} PREFIX=${PREFIX} + +bazel: + bazel build //:opencc + bazel test --test_output=all //src/... //data/... //test/... + +bazel-clean: + bazel clean --expunge diff --git a/NEWS.md b/NEWS.md index 7df139e..0c2767f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,22 @@ # Change History of OpenCC +## Version 1.1.8 + +2024年7月27日 + +* 修正Node新版本編譯的問題([#782](https://github.com/BYVoid/OpenCC/issues/782), [#798](https://github.com/BYVoid/OpenCC/issues/798))。 +* 進一步修正Python包生成腳本([#875](https://github.com/BYVoid/OpenCC/pull/875))。 +* 引入Bazel構建系統以及CI([#879](https://github.com/BYVoid/OpenCC/pull/879))。 +* 引入Github MSVC CI([#880](https://github.com/BYVoid/OpenCC/pull/880))。 +* 爲`opencc`命令行工具添加了字典和配置的路徑`--path`參數。 +* 更新附帶的`googletest`版本到1.15,`pybind11`到2.13.1,`tclap`到1.2.5。 +* 若干轉換字詞修正([#609](https://github.com/BYVoid/OpenCC/pull/609), [#698](https://github.com/BYVoid/OpenCC/pull/698), [#707](https://github.com/BYVoid/OpenCC/pull/707), [#760](https://github.com/BYVoid/OpenCC/pull/760), [#779](https://github.com/BYVoid/OpenCC/pull/779), [#786](https://github.com/BYVoid/OpenCC/pull/786), [#792](https://github.com/BYVoid/OpenCC/pull/792), [#806](https://github.com/BYVoid/OpenCC/pull/806), [#808](https://github.com/BYVoid/OpenCC/pull/808), [#810](https://github.com/BYVoid/OpenCC/pull/810), [#825](https://github.com/BYVoid/OpenCC/pull/825), [#826](https://github.com/BYVoid/OpenCC/pull/826), [#837](https://github.com/BYVoid/OpenCC/pull/837), [#864](https://github.com/BYVoid/OpenCC/pull/864), [#865](https://github.com/BYVoid/OpenCC/pull/865), [#870](https://github.com/BYVoid/OpenCC/pull/870), [#877](https://github.com/BYVoid/OpenCC/pull/877), [#878](https://github.com/BYVoid/OpenCC/pull/878))。 + ## Version 1.1.7 2023年10月15日 -* 添加提交时 python 包重建以验证包生成 ([#822](https://github.com/BYVoid/OpenCC/pull/822))。 +* 添加提交時 python 包重建以驗證包生成 ([#822](https://github.com/BYVoid/OpenCC/pull/822))。 * 支持Python 3.12 和 Node 20,移除針對Python 3.7和Node 16的構建 ([#820](https://github.com/BYVoid/OpenCC/pull/820))。 * add mingw-w64 ci ([#802](https://github.com/BYVoid/OpenCC/pull/802))。 * Add support of CMake config modules ([#763](https://github.com/BYVoid/OpenCC/pull/763))。 @@ -14,8 +26,8 @@ 2022年12月08日 -* 修复python3.11 macos构建 ([#744](https://github.com/BYVoid/OpenCC/pull/744))。 -* Bump gtest 和 benchmark 以与最新的 github runners 一起工作 ([#747](https://github.com/BYVoid/OpenCC/pull/747))。 +* 修復python3.11 macos構建 ([#744](https://github.com/BYVoid/OpenCC/pull/744))。 +* Bump gtest 和 benchmark 以與最新的 github runners 一起工作 ([#747](https://github.com/BYVoid/OpenCC/pull/747))。 ## Version 1.1.5 diff --git a/README.md b/README.md index dc1f87b..e4edffb 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,11 @@ # Open Chinese Convert 開放中文轉換 -[![Travis](https://img.shields.io/travis/BYVoid/OpenCC.svg)](https://travis-ci.org/BYVoid/OpenCC) -[![AppVeyor](https://img.shields.io/appveyor/ci/Carbo/OpenCC.svg)](https://ci.appveyor.com/project/Carbo/OpenCC) -[![C/C++ CI](https://github.com/BYVoid/OpenCC/actions/workflows/cmake.yml/badge.svg)](https://github.com/BYVoid/OpenCC/actions/workflows/cmake.yml) +[![CMake](https://github.com/BYVoid/OpenCC/actions/workflows/cmake.yml/badge.svg)](https://github.com/BYVoid/OpenCC/actions/workflows/cmake.yml) +[![Bazel](https://github.com/BYVoid/OpenCC/actions/workflows/bazel.yml/badge.svg)](https://github.com/BYVoid/OpenCC/actions/workflows/bazel.yml) +[![MSVC](https://github.com/BYVoid/OpenCC/actions/workflows/msvc.yml/badge.svg)](https://github.com/BYVoid/OpenCC/actions/workflows/msvc.yml) [![Node.js CI](https://github.com/BYVoid/OpenCC/actions/workflows/nodejs.yml/badge.svg)](https://github.com/BYVoid/OpenCC/actions/workflows/nodejs.yml) [![Python CI](https://github.com/BYVoid/OpenCC/actions/workflows/python.yml/badge.svg)](https://github.com/BYVoid/OpenCC/actions/workflows/python.yml) +[![AppVeyor](https://img.shields.io/appveyor/ci/Carbo/OpenCC.svg)](https://ci.appveyor.com/project/Carbo/OpenCC) ## Introduction 介紹 @@ -118,6 +119,7 @@ Document 文檔: https://byvoid.github.io/OpenCC/ * WebAssembly: [wasm-opencc](https://github.com/oyyd/wasm-opencc) * Browser Extension: [opencc-extension](https://github.com/tnychn/opencc-extension) * Go (Pure): [OpenCC for Go](https://github.com/longbridgeapp/opencc) +* Dart (native-assets): [opencc-dart](https://github.com/lindeer/opencc-dart) ### Configurations 配置文件 @@ -156,6 +158,13 @@ make build.cmd ``` +### Build with Bazel + +```bash +bazel build //:opencc +bazel test --test_output=all //src/... //data/... //test/... +``` + ### Test 測試 #### Linux & Mac OS X @@ -204,6 +213,8 @@ Example results (from Github CI): ## Projects using OpenCC 使用 OpenCC 的項目 +Please update if your project is using OpenCC. + * [ibus-pinyin](https://github.com/ibus/ibus-pinyin) * [fcitx](https://github.com/fcitx/fcitx) * [rimeime](https://rime.im/) diff --git a/data/config/BUILD.bazel b/data/config/BUILD.bazel new file mode 100644 index 0000000..ea3ee61 --- /dev/null +++ b/data/config/BUILD.bazel @@ -0,0 +1,6 @@ +package(default_visibility = ["//visibility:public"]) + +filegroup( + name = "config", + srcs = glob(["*.json"]), +) diff --git a/data/dictionary/BUILD.bazel b/data/dictionary/BUILD.bazel new file mode 100644 index 0000000..3a1f1aa --- /dev/null +++ b/data/dictionary/BUILD.bazel @@ -0,0 +1,79 @@ +package(default_visibility = ["//visibility:public"]) + +genrule( + name = "merge_TWPhrases", + srcs = [ + "TWPhrasesIT.txt", + "TWPhrasesName.txt", + "TWPhrasesOther.txt", + ], + outs = ["TWPhrases.txt"], + cmd = "$(location //data/scripts:merge) " + + "$(SRCS) $(OUTS)", + tools = ["//data/scripts:merge"], +) + +[ + genrule( + name = "reverse_" + txt, + srcs = [txt + ".txt"], + outs = [txt + "Rev.txt"], + cmd = "$(location //data/scripts:reverse) " + + "$(SRCS) $(OUTS)", + tools = ["//data/scripts:reverse"], + ) + for txt in [ + "TWVariants", + "TWPhrases", + "HKVariants", + "JPVariants", + ] +] + +TEXT_DICTS = glob(["*.txt"]) + [ + "TWPhrases.txt", + "TWVariantsRev.txt", + "TWPhrasesRev.txt", + "HKVariantsRev.txt", + "JPVariantsRev.txt", +] + +[ + genrule( + name = "generate_bin_" + txt[:-4], + srcs = [txt], + outs = [txt.replace(".txt", ".ocd2")], + cmd = "$(location //src/tools:dict_converter) " + + "--input $(location " + txt + ") " + + "--output $(OUTS) " + + "--from text " + + "--to ocd2", + tools = ["//src/tools:dict_converter"], + ) + for txt in TEXT_DICTS +] + +filegroup( + name = "text_dictionaries", + srcs = TEXT_DICTS, +) + +filegroup( + name = "binary_dictionaries", + srcs = [txt.replace(".txt", ".ocd2") for txt in TEXT_DICTS], +) + +cc_test( + name = "dictionary_test", + srcs = ["DictionaryTest.cpp"], + data = [ + ":binary_dictionaries", + ":text_dictionaries", + ], + deps = [ + "//src:lexicon", + "//src:marisa_dict", + "//src:utf8_util", + "@googletest//:gtest_main", + ], +) diff --git a/data/dictionary/DictionaryTest.cpp b/data/dictionary/DictionaryTest.cpp new file mode 100644 index 0000000..7b931c7 --- /dev/null +++ b/data/dictionary/DictionaryTest.cpp @@ -0,0 +1,90 @@ +/* + * Open Chinese Convert + * + * Copyright 2024-2024 Carbo Kuo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "gtest/gtest.h" + +#include "src/Lexicon.hpp" +#include "src/MarisaDict.hpp" +#include "src/UTF8Util.hpp" + +namespace opencc { + +const char* RUNFILE_SUFFIX = ".runfiles/_main"; + +class DictionaryTest : public ::testing::Test, + public ::testing::WithParamInterface { +protected: + static void SetUpTestSuite() { + + std::string program_filename = ::testing::internal::GetArgvs().front(); + size_t suffix_pos = program_filename.find(RUNFILE_SUFFIX); + ASSERT_NE(suffix_pos, std::string::npos); + + runfile_dir_ = + program_filename.substr(0, suffix_pos + strlen(RUNFILE_SUFFIX)); + } + + static std::string runfile_dir_; +}; + +std::string DictionaryTest::runfile_dir_; + +INSTANTIATE_TEST_SUITE_P( + , DictionaryTest, + ::testing::Values("HKVariants", "HKVariantsRevPhrases", + "JPShinjitaiCharacters", "JPShinjitaiPhrases", + "JPVariants", "STCharacters", "STPhrases", "TSCharacters", + "TSPhrases", "TWPhrasesIT", "TWPhrasesName", + "TWPhrasesOther", "TWVariants", "TWVariantsRevPhrases", + "TWPhrases", "TWVariantsRev", "TWPhrasesRev", + "HKVariantsRev", "JPVariantsRev"), + [](const testing::TestParamInfo& info) { + return info.param; + }); + +TEST_P(DictionaryTest, UniqueSortedTest) { + const std::string dictionaryFileName = + runfile_dir_ + "/data/dictionary/" + GetParam() + ".txt"; + FILE* fp = + fopen(UTF8Util::GetPlatformString(dictionaryFileName).c_str(), "rb"); + ASSERT_NE(fp, nullptr); + LexiconPtr lexicon = Lexicon::ParseLexiconFromFile(fp); + EXPECT_TRUE(lexicon->IsUnique()) << GetParam() << " has duplicated keys."; + EXPECT_TRUE(lexicon->IsSorted()) << GetParam() << " is not sorted."; +} + +TEST_P(DictionaryTest, BinaryTest) { + const std::string binaryDictionaryFileName = + runfile_dir_ + "/data/dictionary/" + GetParam() + ".ocd2"; + FILE* fp_bin = fopen( + UTF8Util::GetPlatformString(binaryDictionaryFileName).c_str(), "rb"); + ASSERT_NE(fp_bin, nullptr); + MarisaDictPtr dict = MarisaDict::NewFromFile(fp_bin); + ASSERT_NE(dict, nullptr); + + const std::string textDictionaryFileName = + runfile_dir_ + "/data/dictionary/" + GetParam() + ".txt"; + FILE* fp_txt = + fopen(UTF8Util::GetPlatformString(textDictionaryFileName).c_str(), "rb"); + ASSERT_NE(fp_txt, nullptr); + LexiconPtr txt_lexicon = Lexicon::ParseLexiconFromFile(fp_txt); + + EXPECT_EQ(dict->GetLexicon()->Length(), txt_lexicon->Length()); +} + +} // namespace opencc diff --git a/data/dictionary/JPShinjitaiCharacters.txt b/data/dictionary/JPShinjitaiCharacters.txt index 2b19e66..30220aa 100644 --- a/data/dictionary/JPShinjitaiCharacters.txt +++ b/data/dictionary/JPShinjitaiCharacters.txt @@ -2,6 +2,6 @@ 弁 辨 辯 瓣 辦 弁 御 御 禦 欠 缺 欠 +浜 濱 浜 糸 絲 糸 芸 藝 芸 -浜 濱 浜 diff --git a/data/dictionary/JPVariants.txt b/data/dictionary/JPVariants.txt index 3a8468b..0d9a742 100644 --- a/data/dictionary/JPVariants.txt +++ b/data/dictionary/JPVariants.txt @@ -12,6 +12,7 @@ 兒 児 內 内 兩 両 +冰 氷 剎 刹 剩 剰 劍 剣 @@ -158,12 +159,12 @@ 淨 浄 淺 浅 渴 渇 -潑 溌 溪 渓 溫 温 溼 湿 滯 滞 滿 満 +潑 溌 潛 潜 澀 渋 澤 沢 @@ -203,6 +204,7 @@ 盜 盗 盡 尽 碎 砕 +礪 砺 祕 秘 祿 禄 禦 御 @@ -229,10 +231,10 @@ 縣 県 縱 縦 總 総 -繫 繋 繡 繍 繩 縄 繪 絵 +繫 繋 繼 継 續 続 纔 才 @@ -257,7 +259,7 @@ 舊 旧 舍 舎 荔 茘 -莊 荘 +莊 荘 庄 莖 茎 菸 煙 萊 莱 @@ -312,9 +314,9 @@ 鄉 郷 酢 醋 醉 酔 -醱 醗 醫 医 醬 醤 +醱 醗 釀 醸 釋 釈 鋪 舗 diff --git a/data/dictionary/STCharacters.txt b/data/dictionary/STCharacters.txt index 89edbc9..7347645 100644 --- a/data/dictionary/STCharacters.txt +++ b/data/dictionary/STCharacters.txt @@ -872,7 +872,7 @@ 栏 欄 树 樹 栖 棲 -栗 慄 栗 +栗 栗 慄 样 樣 核 核 覈 栾 欒 diff --git a/data/dictionary/STPhrases.txt b/data/dictionary/STPhrases.txt index 4a61865..fd40137 100644 --- a/data/dictionary/STPhrases.txt +++ b/data/dictionary/STPhrases.txt @@ -2093,6 +2093,7 @@ 丹干 丹干 丹徒布衣 丹徒布衣 丹朱 丹朱 +丹棱 丹稜 丹药 丹藥 为中台 爲中颱 为了 爲了 @@ -3737,7 +3738,7 @@ 伊郁 伊鬱 伊里奇 伊里奇 伊里布 伊里布 -伊里格瑞 伊裏格瑞 +伊里格瑞 伊里格瑞 伊面 伊麪 伍员鞭尸 伍員鞭屍 伍德合金 伍德合金 @@ -4112,6 +4113,7 @@ 余悸 餘悸 余情 餘情 余情未了 餘情未了 +余慈高速 餘慈高速 余户 餘戶 余政宪 余政憲 余数 餘數 @@ -5845,6 +5847,7 @@ 兰摧玉折 蘭摧玉折 兰摧蕙折 蘭摧蕙折 兰棱 蘭棱 +兰溪 蘭谿 兰秋 蘭秋 兰艾同烬 蘭艾同燼 兰艾同焚 蘭艾同焚 @@ -6372,6 +6375,7 @@ 冲帐 沖帳 冲年 沖年 冲床 衝牀 +冲床工 沖牀工 冲开 衝開 冲弱 沖弱 冲得入 衝得入 @@ -6420,7 +6424,6 @@ 冲淡 沖淡 冲澡 沖澡 冲然 衝然 -冲牀工 沖牀工 冲犯 衝犯 冲田 沖田 冲盹 衝盹 @@ -8863,7 +8866,7 @@ 加荣耀于 加榮耀於 加药 加藥 加解密系统 加解密系統 -加达里 加達裏 +加达里 加達里 加速踏板 加速踏板 加里 加里 加里宁 加里寧 @@ -13485,6 +13488,7 @@ 同年而语 同年而語 同庆 同慶 同床 同牀 +同床各梦 同牀各夢 同床异梦 同牀異夢 同庚 同庚 同度 同度 @@ -13641,8 +13645,6 @@ 同爲 同爲 同父 同父 同父异母 同父異母 -同牀各梦 同牀各夢 -同牀异梦 同牀異夢 同班 同班 同班同学 同班同學 同理 同理 @@ -14369,6 +14371,7 @@ 吞刀刮肠 吞刀刮腸 吞咽 吞嚥 吞并 吞併 +吞武里 吞武里 吞烟 吞煙 吞米桑布札 吞米桑布札 吟叹 吟歎 @@ -14875,6 +14878,7 @@ 咸宁 咸寧 咸宁地区 咸寧地區 咸宁市 咸寧市 +咸安 咸安 咸安区 咸安區 咸宜 咸宜 咸度 鹹度 @@ -15021,7 +15025,7 @@ 哈里斯 哈里斯 哈里斯堡 哈里斯堡 哈里札德 哈里札德 -哈里森史密特 哈裏森史密特 +哈里森史密特 哈里森史密特 哈里路亚 哈里路亞 哈里逊 哈里遜 哈里逊福特 哈里遜福特 @@ -16440,7 +16444,7 @@ 培美曲塞 培美曲塞 培育出 培育出 培育出来 培育出來 -培里克利斯 培裏克利斯 +培里克利斯 培里克利斯 培里克里斯 培里克里斯 基于 基於 基克维特 基克維特 @@ -16470,7 +16474,7 @@ 基民党 基民黨 基里兰柯 基里蘭柯 基里巴斯 基里巴斯 -基里巴斯共和国 基裏巴斯共和國 +基里巴斯共和国 基里巴斯共和國 基面 基面 堂分姑娘 堂分姑娘 堂后官 堂後官 @@ -16538,7 +16542,7 @@ 塞药 塞藥 塞莉佛维克 塞莉佛維克 塞车症候群 塞車症候羣 -塞韦里诺 塞韋裏諾 +塞韦里诺 塞韋里諾 填个 填個 填了 填了 填发 填發 @@ -17201,8 +17205,8 @@ 大伙 大夥 大伙人 大夥人 大伙儿 大夥兒 -大余 大餘 -大余县 大餘縣 +大余 大余 +大余县 大余縣 大便干燥 大便乾燥 大修 大修 大修理 大修理 @@ -17584,6 +17588,7 @@ 太初历史 太初歷史 太卜 太卜 太原师范学院 太原師範學院 +太古里 太古里 太后 太后 太咸 太鹹 太好了 太好了 @@ -18603,7 +18608,7 @@ 实录 實錄 实才 實才 实据 實據 -实时技术 實時技術 +实时 即時 实用价值 實用價值 实症 實症 实质面 實質面 @@ -19133,7 +19138,7 @@ 尤克勒斯 尤克勒斯 尤克斯 尤克斯 尤克里斯 尤克里斯 -尤克里里琴 尤克裏裏琴 +尤克里里琴 尤克里里琴 尤基里斯 尤基里斯 尤班克斯 尤班克斯 尤秋兴 尤秋興 @@ -19216,6 +19221,8 @@ 尺幅千里 尺幅千里 尺板 尺板 尺板斗食 尺板斗食 +尼乾子 尼乾子 +尼乾陀 尼乾陀 尼亚加拉瀑布 尼亞加拉瀑布 尼克 尼克 尼克劳斯 尼克勞斯 @@ -19670,6 +19677,7 @@ 左氏春秋 左氏春秋 左邻右舍 左鄰右舍 左邻右里 左鄰右里 +左里 左里 左里克 左里克 左面 左面 巧了 巧了 @@ -20211,8 +20219,8 @@ 帅呆了 帥呆了 帆布 帆布 帆布包 帆布包 +帆布床 帆布牀 帆布椅 帆布椅 -帆布牀 帆布牀 帆布袋 帆布袋 帆布鞋 帆布鞋 帆板 帆板 @@ -20242,7 +20250,7 @@ 希布伦市 希布倫市 希拉克 希拉剋 希拉里 希拉里 -希拉里克林顿 希拉裏克林頓 +希拉里克林顿 希拉里克林頓 希斯仑 希斯崙 希斯莱杰 希斯萊傑 希斯雷杰 希斯雷傑 @@ -20615,7 +20623,7 @@ 干涸 乾涸 干淨 乾淨 干淨俐落 乾淨俐落 -干渠 乾渠 +干渠 幹渠 干渴 乾渴 干湿 乾溼 干湿发 乾溼髮 @@ -21168,6 +21176,7 @@ 庇里牛斯 庇里牛斯 庇里牛斯山 庇里牛斯山 床头柜 牀頭櫃 +床头金尽 牀頭金盡 床席 牀蓆 床板 牀板 序升 序升 @@ -21532,16 +21541,16 @@ 引致 引致 引蛇出洞 引蛇出洞 弗兰克 弗蘭克 -弗洛里斯岛 弗洛裏斯島 -弗罗里达 弗羅裏達 -弗罗里达州 弗羅裏達州 -弗里得里希 弗裏得裏希 +弗洛里斯岛 弗洛里斯島 +弗罗里达 弗羅里達 +弗罗里达州 弗羅里達州 +弗里得里希 弗里得里希 弗里德里希 弗里德里希 弗里敦 弗里敦 弗里斯兰 弗里斯蘭 弗里曼 弗里曼 -弗雷德里克 弗雷德裏克 -弗雷德里克顿 弗雷德裏克頓 +弗雷德里克 弗雷德里克 +弗雷德里克顿 弗雷德里克頓 弘历 弘曆 弘愿 弘願 张三丰 張三丰 @@ -23063,11 +23072,11 @@ 恐韩症 恐韓症 恐高症 恐高症 恐鸡症 恐雞症 -恒生 恒生 -恒指 恒指 恒大 恒大 +恒指 恒指 恒星周期 恆星週期 恒春野百合 恆春野百合 +恒生 恒生 恒言录 恆言錄 恕乏价催 恕乏价催 恙虫 恙蟲 @@ -23564,9 +23573,11 @@ 愿谨 願謹 愿闻其详 願聞其詳 慈云 慈雲 +慈余高速 慈餘高速 慈制 慈制 慈安太后 慈安太后 慈悲喜舍 慈悲喜捨 +慈溪 慈谿 慈禧太后 慈禧太后 慌了 慌了 慌了手脚 慌了手腳 @@ -24534,9 +24545,10 @@ 扳回 扳回 扳回一城 扳回一城 扶了 扶了 -扶余 扶余 -扶余县 扶余縣 +扶余 扶餘 +扶余县 扶餘縣 扶余国 扶餘國 +扶余市 扶餘市 扶出 扶出 扶出去 扶出去 扶出来 扶出來 @@ -24778,10 +24790,10 @@ 折变 折變 折叠 摺疊 折叠为 摺疊爲 +折叠床 摺疊牀 折叠式 摺疊式 折叠扇 摺疊扇 折叠椅 摺疊椅 -折叠牀 摺疊牀 折叠起来 摺疊起來 折台 折檯 折合 摺合 @@ -25915,6 +25927,7 @@ 据实相告 據實相告 据常 據常 据干而窥井底 據榦而窺井底 +据床指麾 據牀指麾 据悉 據悉 据情办理 據情辦理 据我看 據我看 @@ -25924,7 +25937,6 @@ 据有 據有 据此 據此 据点 據點 -据牀指麾 據牀指麾 据理 據理 据理力争 據理力爭 据理而争 據理而爭 @@ -25973,6 +25985,7 @@ 掉出来 掉出來 掉发 掉髮 掉回头 掉回頭 +掉海里 掉海裏 掊克 掊克 掊斗折衡 掊斗折衡 掌柜 掌櫃 @@ -26174,6 +26187,7 @@ 提炼出 提煉出 提甕出汲 提甕出汲 提纯复壮 提純復壯 +提纳里 提納里 提舍尼 提舍尼 插于 插於 插回 插回 @@ -27386,8 +27400,8 @@ 新丰县 新豐縣 新丰酒 新豐酒 新书看板 新書看板 -新余 新餘 -新余市 新餘市 +新余 新余 +新余市 新余市 新修本草 新修本草 新党 新黨 新几內亚 新幾內亞 @@ -28858,7 +28872,7 @@ 木板地 木板地 木板墙 木板牆 木板大鼓 木板大鼓 -木板牀 木板牀 +木板床 木板牀 木板画 木板畫 木柜 木櫃 木梁 木樑 @@ -29392,7 +29406,17 @@ 束矢难折 束矢難折 束身修行 束身修行 束身自修 束身自修 +杠一 杠一 +杠七 杠七 +杠三 杠三 杠上 槓上 +杠九 杠九 +杠二 杠二 +杠五 杠五 +杠八 杠八 +杠六 杠六 +杠四 杠四 +杠增一 杠增一 杠头 槓頭 杠子 槓子 杠杆 槓桿 @@ -31856,8 +31880,8 @@ 沙参 沙蔘 沙发 沙發 沙发垫 沙發墊 +沙发床 沙發牀 沙发椅 沙發椅 -沙发牀 沙發牀 沙坑杆 沙坑桿 沙岩 沙岩 沙弥 沙彌 @@ -32029,9 +32053,11 @@ 沽名干誉 沽名干譽 沽酒当炉 沽酒當爐 沾体 霑體 +沾化 霑化 沾恩 霑恩 沾染控制 沾染控制 沾洽 霑洽 +沾益 霑益 沾衿 霑衿 沿才授职 沿才授職 沿门托钵 沿門托鉢 @@ -32585,6 +32611,7 @@ 浓雾密布 濃霧密佈 浙江天台县 浙江天台縣 浙江师范大学 浙江師範大學 +浚县 濬縣 浥注 浥注 浦发 浦發 浩克 浩克 @@ -32624,8 +32651,6 @@ 浮尸 浮屍 浮托 浮托 浮松 浮鬆 -浮梁 浮樑 -浮梁县 浮樑縣 浮沈 浮沈 浮泛 浮泛 浮游 浮游 @@ -33316,7 +33341,7 @@ 湖南师范大学 湖南師範大學 湖州师范学院 湖州師範學院 湖里 湖裏 -湖里区 湖里區 +湖里区 湖裏區 湖面 湖面 湘帘 湘簾 湘累 湘累 @@ -34297,8 +34322,6 @@ 爷饭娘羹 爺飯孃羹 爹娘 爹孃 爽荡 爽蕩 -牀头柜 牀頭櫃 -牀头金尽 牀頭金盡 片云遮顶 片雲遮頂 片价 片價 片善小才 片善小才 @@ -34917,7 +34940,7 @@ 瓦舍 瓦舍 瓦萨里 瓦薩里 瓦西里 瓦西里 -瓦西里耶维奇 瓦西裏耶維奇 +瓦西里耶维奇 瓦西里耶維奇 瓦解云散 瓦解雲散 瓦达克 瓦達克 瓦里 瓦里 @@ -36072,7 +36095,6 @@ 短了 短了 短于 短於 短价 短價 -短几 短几 短发 短髮 短发性 短發性 短叹 短嘆 @@ -36505,6 +36527,7 @@ 神采飘逸 神采飄逸 神采飞扬 神采飛揚 神采骏发 神采駿發 +神里 神里 神雕 神鵰 神雕侠侣 神鵰俠侶 神雕像 神雕像 @@ -37044,7 +37067,7 @@ 穆克吉 穆克吉 穆巴拉克 穆巴拉克 穆斯坦西里 穆斯坦西里 -穆棱 穆棱 +穆棱 穆稜 穆罕默德历 穆罕默德曆 穆罕默德历史 穆罕默德歷史 穗儿 穗兒 @@ -37287,6 +37310,7 @@ 竹板书 竹板書 竹板歌 竹板歌 竹林之游 竹林之遊 +竹溪 竹谿 竹笋干 竹筍乾 竹签 竹籤 竹篱茅舍 竹籬茅舍 @@ -37340,8 +37364,7 @@ 符合 符合 符合标准 符合標準 符合美国利益 符合美國利益 -符拉迪沃斯托克 符拉迪沃斯託克 -符拉迪沃斯讬克 符拉迪沃斯託克 +符拉迪沃斯托克 符拉迪沃斯托克 符采 符采 笨蛋挂 笨蛋掛 第一个 第一個 @@ -37662,7 +37685,7 @@ 米制 米制 米卤蛋 米滷蛋 米厘米突 米釐米突 -米德尔伯里 米德爾伯裏 +米德尔伯里 米德爾伯里 米格式战斗机 米格式戰鬥機 米纳谷 米納谷 米罗的维纳斯雕像 米羅的維納斯雕像 @@ -37934,7 +37957,7 @@ 索托 索托 索杰纳 索傑納 索福克勒斯 索福克勒斯 -索福克里斯 索福克裏斯 +索福克里斯 索福克里斯 索里亚 索里亞 索里士 索里士 索面 索麪 @@ -38274,6 +38297,7 @@ 织布机 織布機 织席 織蓆 织当访婢 織當訪婢 +织里 織里 织锦回文 織錦回文 终了 終了 终于 終於 @@ -38474,6 +38498,7 @@ 绣阁 繡閣 绣面 繡面 绣鞋 繡鞋 +绥棱 綏稜 绦虫 絛蟲 绦虫纲 絛蟲綱 继天立极 繼天立極 @@ -38610,7 +38635,7 @@ 缕当 縷當 编个 編個 编了 編了 -编余 編余 +编余 編餘 编余人员 編餘人員 编修 編修 编写出 編寫出 @@ -40314,6 +40339,9 @@ 艾里亚森 艾里亞森 艾里斯 艾里斯 艾里森 艾里森 +艾里西 艾里西 +艾里西湖 艾里西湖 +艾里西湖镇 艾里西湖鎮 艾里赛宫 艾里賽宮 节余 節餘 节制 節制 @@ -43105,7 +43133,7 @@ 读万卷书 讀萬卷書 读万卷书行万里路 讀萬卷書行萬里路 读不舍手 讀不捨手 -读书三余 讀書三余 +读书三余 讀書三餘 读书种子 讀書種子 读了 讀了 读出 讀出 @@ -43257,8 +43285,8 @@ 谷垣 谷垣 谷垣祯 谷垣禎 谷垣祯一 谷垣禎一 -谷城 谷城 -谷城县 谷城縣 +谷城 穀城 +谷城县 穀城縣 谷壁 谷壁 谷壳 穀殼 谷子 穀子 @@ -43297,6 +43325,8 @@ 谷贵饿农谷贱伤农 穀貴餓農穀賤傷農 谷道 穀道 谷都 谷都 +谷里 谷里 +谷里街道 谷里街道 谷雨 穀雨 谷风 穀風 谷風 谷食 穀食 @@ -43575,6 +43605,7 @@ 资方代表 資方代表 资治通鉴 資治通鑑 资源回收 資源回收 +资溪 資谿 资管系 資管系 资讯学系 資訊學系 资讯系 資訊系 @@ -44327,6 +44358,7 @@ 辩论术 辯論術 辫发 辮髮 辫穗头 辮穗頭 +辰溪 辰谿 辱游 辱游 边修 邊修 边境冲突 邊境衝突 @@ -44457,6 +44489,7 @@ 近朱近墨 近朱近墨 近水楼台 近水樓臺 近视眼生了瞎子 近視眼生了瞎子 +返佣 返佣 返台 返臺 返吟复吟 返吟復吟 返回 返回 @@ -44902,6 +44935,7 @@ 通联记录 通聯記錄 通讯录 通訊錄 通讯系统 通訊系統 +通车里程 通車里程 通过事后 通過事後 通鉴 通鑑 逛了 逛了 @@ -44999,6 +45033,7 @@ 道合志同 道合志同 道同志合 道同志合 道听涂说 道聽塗說 +道咸 道咸 道尔顿制 道爾頓制 道尽 道盡 道尽涂殚 道盡塗殫 @@ -45612,6 +45647,7 @@ 里特维宁科 里特維寧科 里瓦几亚条约 里瓦幾亞條約 里瓦尔多 裏瓦爾多 +里甲 里甲 里社 里社 里科 里科 里程 里程 @@ -46022,6 +46058,7 @@ 钟祥县 鍾祥縣 钟祥市 鍾祥市 钟福松 鐘福松 +钟离 鍾離 钟纽 鐘紐 钟罩 鐘罩 钟腰 鐘腰 @@ -46394,6 +46431,7 @@ 长寿面 長壽麪 长干巷 長干巷 长干曲 長干曲 +长干里 長干里 长征 長征 长征军 長征軍 长恶不悛 長惡不悛 diff --git a/data/dictionary/TSPhrases.txt b/data/dictionary/TSPhrases.txt index 36e4dec..792a1ca 100644 --- a/data/dictionary/TSPhrases.txt +++ b/data/dictionary/TSPhrases.txt @@ -52,6 +52,7 @@ 大目乾連冥間救母變文 大目乾连冥间救母变文 宫商角徵羽 宫商角徵羽 射覆 射复 +尼乾子 尼乾子 尼乾陀 尼乾陀 幺麼 幺麽 幺麼小丑 幺麽小丑 diff --git a/data/dictionary/TWPhrasesIT.txt b/data/dictionary/TWPhrasesIT.txt index 8d7fe04..8fd94f3 100644 --- a/data/dictionary/TWPhrasesIT.txt +++ b/data/dictionary/TWPhrasesIT.txt @@ -3,12 +3,12 @@ SQL注入 SQL隱碼攻擊 SQL注入攻擊 SQL隱碼攻擊 U盤 隨身碟 三極管 三極體 -下拉列表 下拉選單 +下拉列表 下拉式清單 並行計算 平行計算 中間件 中介軟體 -串口 串列埠 +串口 序列埠 串行 序列 -串行端口 串列埠 +串行端口 序列埠 主引導記錄 主開機記錄 主板 主機板 二極管 二極體 @@ -28,7 +28,7 @@ U盤 隨身碟 保存 儲存 信噪比 訊雜比 信息 資訊 -信息安全 資訊保安 +信息安全 資訊安全 信息技術 資訊科技 信息論 資訊理論 信號 訊號 信號 @@ -74,15 +74,15 @@ U盤 隨身碟 句柄 控制代碼 可視化 視覺化 呼出 撥出 -呼叫轉移 來電轉駁 +呼叫轉移 來電轉接 命令式編程 指令式程式設計 命令行 命令列 -命名空間 名稱空間 +命名空間 名稱空間 名字空間 哈希 雜湊 單片機 微控制器 回調 回撥 固件 韌體 -圖像 影象 +圖像 影像 圖庫 相簿 圖標 圖示 在線 線上 @@ -93,7 +93,7 @@ U盤 隨身碟 場效應管 場效電晶體 壁紙 桌布 壁紙 外置 外接 -外鍵 外來鍵 +外鍵 外部索引鍵 多任務 多工 多態 多型 多線程 多執行緒 @@ -101,7 +101,7 @@ U盤 隨身碟 字段 欄位 字符 字元 字符串 字串 -字符集 字符集 +字符集 字元集 字節 位元組 字體 字型 存儲 儲存 @@ -217,7 +217,7 @@ U盤 隨身碟 溢出 溢位 滾動條 捲軸 演示文稿 簡報 -激光 鐳射 +激光 雷射 激活 啟用 無損壓縮 無失真壓縮 物理內存 實體記憶體 @@ -256,7 +256,7 @@ U盤 隨身碟 空分複用 空間多工 窗口 視窗 端口 埠 -筆記本電腦 膝上型電腦 +筆記本電腦 筆記型電腦 算子 運算元 算法 演算法 範式 正規化 @@ -291,7 +291,7 @@ U盤 隨身碟 菜單 選單 菜單 萬維網 全球資訊網 藍屏 藍色畫面 -藍牙 藍芽 +處理程序 處理程序 虛函數 虛擬函式 虛擬機 虛擬機器 虛擬機器 虛擬機器 diff --git a/data/dictionary/TWPhrasesOther.txt b/data/dictionary/TWPhrasesOther.txt index 22b3b89..7a87ae7 100644 --- a/data/dictionary/TWPhrasesOther.txt +++ b/data/dictionary/TWPhrasesOther.txt @@ -1,3 +1,4 @@ +借記卡 簽帳金融卡 元音 母音 冰棍 冰棒 出租車 計程車 @@ -5,7 +6,9 @@ 塑料 塑膠 奔馳 賓士 奶酪 乳酪 +字節跳動 字節跳動 幾率 機率 +摩爾線程 摩爾線程 方便麪 泡麵 速食麵 李彥宏 李彥宏 概率 機率 @@ -34,4 +37,3 @@ 鐦 鉲 鑥 鎦 黃宏 黃宏 -借記卡 簽帳金融卡 diff --git a/data/scripts/BUILD.bazel b/data/scripts/BUILD.bazel new file mode 100644 index 0000000..d9f718d --- /dev/null +++ b/data/scripts/BUILD.bazel @@ -0,0 +1,23 @@ +load("@rules_python//python:py_binary.bzl", "py_binary") +load("@rules_python//python:py_library.bzl", "py_library") + +package(default_visibility = ["//visibility:public"]) + +py_library( + name = "common", + srcs = ["common.py"], +) + +py_binary( + name = "merge", + srcs = ["merge.py"], + imports = ["."], + deps = [":common"], +) + +py_binary( + name = "reverse", + srcs = ["reverse.py"], + imports = ["."], + deps = [":common"], +) diff --git a/data/scripts/sort_all.py b/data/scripts/sort_all.py index d1ba06c..bd57ae7 100755 --- a/data/scripts/sort_all.py +++ b/data/scripts/sort_all.py @@ -11,7 +11,7 @@ if len(sys.argv) < 2: exit(1) directory = sys.argv[1] -files = glob.glob(directory + "/*") +files = glob.glob(directory + "/*.txt") for filename in files: print(filename) sort_items(filename, filename) diff --git a/deps/darts-clone/darts.h b/deps/darts-clone/darts.h deleted file mode 100644 index d47b0e3..0000000 --- a/deps/darts-clone/darts.h +++ /dev/null @@ -1,1898 +0,0 @@ -#ifndef DARTS_H_ -#define DARTS_H_ - -#include -#include -#include - -#define DARTS_VERSION "0.32" - -// DARTS_THROW() throws a whose message starts with the -// file name and the line number. For example, DARTS_THROW("error message") at -// line 123 of "darts.h" throws a which has a pointer to -// "darts.h:123: exception: error message". The message is available by using -// what() as well as that of . -#define DARTS_INT_TO_STR(value) #value -#define DARTS_LINE_TO_STR(line) DARTS_INT_TO_STR(line) -#define DARTS_LINE_STR DARTS_LINE_TO_STR(__LINE__) -#define DARTS_THROW(msg) throw Darts::Details::Exception( \ - __FILE__ ":" DARTS_LINE_STR ": exception: " msg) - -namespace Darts { - -// The following namespace hides the internal types and classes. -namespace Details { - -// This header assumes that and are 32-bit integer types. -// -// Darts-clone keeps values associated with keys. The type of the values is -// . Note that the values must be positive integers because the -// most significant bit (MSB) of each value is used to represent whether the -// corresponding unit is a leaf or not. Also, the keys are represented by -// sequences of s. is the unsigned type of . -typedef char char_type; -typedef unsigned char uchar_type; -typedef int value_type; - -// The main structure of Darts-clone is an array of s, and the -// unit type is actually a wrapper of . -typedef size_t id_type; - -// is the type of callback functions for reporting the -// progress of building a dictionary. See also build() of . -// The 1st argument receives the progress value and the 2nd argument receives -// the maximum progress value. A usage example is to show the progress -// percentage, 100.0 * (the 1st argument) / (the 2nd argument). -typedef int (*progress_func_type)(std::size_t, std::size_t); - -// is the type of double-array units and it is a wrapper of -// in practice. -class DoubleArrayUnit { - public: - DoubleArrayUnit() : unit_() {} - - // has_leaf() returns whether a leaf unit is immediately derived from the - // unit (true) or not (false). - bool has_leaf() const { - return ((unit_ >> 8) & 1) == 1; - } - // value() returns the value stored in the unit, and thus value() is - // available when and only when the unit is a leaf unit. - value_type value() const { - return static_cast(unit_ & ((1U << 31) - 1)); - } - - // label() returns the label associted with the unit. Note that a leaf unit - // always returns an invalid label. For this feature, leaf unit's label() - // returns an that has the MSB of 1. - id_type label() const { - return unit_ & ((1U << 31) | 0xFF); - } - // offset() returns the offset from the unit to its derived units. - id_type offset() const { - return (unit_ >> 10) << ((unit_ & (1U << 9)) >> 6); - } - - private: - id_type unit_; - - // Copyable. -}; - -// Darts-clone throws an for memory allocation failure, invalid -// arguments or a too large offset. The last case means that there are too many -// keys in the given set of keys. Note that the `msg' of must be a -// constant or static string because an keeps only a pointer to -// that string. -class Exception : public std::exception { - public: - explicit Exception(const char *msg = NULL) throw() : msg_(msg) {} - Exception(const Exception &rhs) throw() : msg_(rhs.msg_) {} - virtual ~Exception() throw() {} - - // overrides what() of . - virtual const char *what() const throw() { - return (msg_ != NULL) ? msg_ : ""; - } - - private: - const char *msg_; - - // Disallows operator=. - Exception &operator=(const Exception &); -}; - -} // namespace Details - -// is the interface of Darts-clone. Note that other -// classes should not be accessed from outside. -// -// has 4 template arguments but only the 3rd one is used as -// the type of values. Note that the given is used only from outside, and -// the internal value type is not changed from . -// In build(), given values are casted from to -// by using static_cast. On the other hand, values are casted from -// to in searching dictionaries. -template -class DoubleArrayImpl { - public: - // Even if this is changed, the internal value type is still - // . Other types, such as 64-bit integer types - // and floating-point number types, should not be used. - typedef T value_type; - // A key is reprenseted by a sequence of s. For example, - // exactMatchSearch() takes a . - typedef Details::char_type key_type; - // In searching dictionaries, the values associated with the matched keys are - // stored into or returned as s. - typedef value_type result_type; - - // enables applications to get the lengths of the matched - // keys in addition to the values. - struct result_pair_type { - value_type value; - std::size_t length; - }; - - // The constructor initializes member variables with 0 and NULLs. - DoubleArrayImpl() : size_(0), array_(NULL), buf_(NULL) {} - // The destructor frees memory allocated for units and then initializes - // member variables with 0 and NULLs. - virtual ~DoubleArrayImpl() { - clear(); - } - - // has 2 kinds of set_result()s. The 1st set_result() is to - // set a value to a . The 2nd set_result() is to set a value and - // a length to a . By using set_result()s, search methods - // can return the 2 kinds of results in the same way. - // Why the set_result()s are non-static? It is for compatibility. - // - // The 1st set_result() takes a length as the 3rd argument but it is not - // used. If a compiler does a good job, codes for getting the length may be - // removed. - void set_result(value_type *result, value_type value, std::size_t) const { - *result = value; - } - // The 2nd set_result() uses both `value' and `length'. - void set_result(result_pair_type *result, - value_type value, std::size_t length) const { - result->value = value; - result->length = length; - } - - // set_array() calls clear() in order to free memory allocated to the old - // array and then sets a new array. This function is useful to set a memory- - // mapped array. Note that the array set by set_array() is not freed in - // clear() and the destructor of . - // set_array() can also set the size of the new array but the size is not - // used in search methods. So it works well even if the 2nd argument is 0 or - // omitted. Remember that size() and total_size() returns 0 in such a case. - void set_array(const void *ptr, std::size_t size = 0) { - clear(); - array_ = static_cast(ptr); - size_ = size; - } - // array() returns a pointer to the array of units. - const void *array() const { - return array_; - } - - // clear() frees memory allocated to units and then initializes member - // variables with 0 and NULLs. Note that clear() does not free memory if the - // array of units was set by set_array(). In such a case, `array_' is not - // NULL and `buf_' is NULL. - void clear() { - size_ = 0; - array_ = NULL; - if (buf_ != NULL) { - delete[] buf_; - buf_ = NULL; - } - } - - // unit_size() returns the size of each unit. The size must be 4 bytes. - std::size_t unit_size() const { - return sizeof(unit_type); - } - // size() returns the number of units. It can be 0 if set_array() is used. - std::size_t size() const { - return size_; - } - // total_size() returns the number of bytes allocated to the array of units. - // It can be 0 if set_array() is used. - std::size_t total_size() const { - return unit_size() * size(); - } - // nonzero_size() exists for compatibility. It always returns the number of - // units because it takes long time to count the number of non-zero units. - std::size_t nonzero_size() const { - return size(); - } - - // build() constructs a dictionary from given key-value pairs. If `lengths' - // is NULL, `keys' is handled as an array of zero-terminated strings. If - // `values' is NULL, the index in `keys' is associated with each key, i.e. - // the ith key has (i - 1) as its value. - // Note that the key-value pairs must be arranged in key order and the values - // must not be negative. Also, if there are duplicate keys, only the first - // pair will be stored in the resultant dictionary. - // `progress_func' is a pointer to a callback function. If it is not NULL, - // it will be called in build() so that the caller can check the progress of - // dictionary construction. For details, please see the definition of - // . - // The return value of build() is 0, and it indicates the success of the - // operation. Otherwise, build() throws a , which is a - // derived class of . - // build() uses another construction algorithm if `values' is not NULL. In - // this case, Darts-clone uses a Directed Acyclic Word Graph (DAWG) instead - // of a trie because a DAWG is likely to be more compact than a trie. - int build(std::size_t num_keys, const key_type * const *keys, - const std::size_t *lengths = NULL, const value_type *values = NULL, - Details::progress_func_type progress_func = NULL); - - // open() reads an array of units from the specified file. And if it goes - // well, the old array will be freed and replaced with the new array read - // from the file. `offset' specifies the number of bytes to be skipped before - // reading an array. `size' specifies the number of bytes to be read from the - // file. If the `size' is 0, the whole file will be read. - // open() returns 0 iff the operation succeeds. Otherwise, it returns a - // non-zero value or throws a . The exception is thrown - // when and only when a memory allocation fails. - int open(const char *file_name, const char *mode = "rb", - std::size_t offset = 0, std::size_t size = 0); - // save() writes the array of units into the specified file. `offset' - // specifies the number of bytes to be skipped before writing the array. - // open() returns 0 iff the operation succeeds. Otherwise, it returns a - // non-zero value. - int save(const char *file_name, const char *mode = "wb", - std::size_t offset = 0) const; - - // The 1st exactMatchSearch() tests whether the given key exists or not, and - // if it exists, its value and length are set to `result'. Otherwise, the - // value and the length of `result' are set to -1 and 0 respectively. - // Note that if `length' is 0, `key' is handled as a zero-terminated string. - // `node_pos' specifies the start position of matching. This argument enables - // the combination of exactMatchSearch() and traverse(). For example, if you - // want to test "xyzA", "xyzBC", and "xyzDE", you can use traverse() to get - // the node position corresponding to "xyz" and then you can use - // exactMatchSearch() to test "A", "BC", and "DE" from that position. - // Note that the length of `result' indicates the length from the `node_pos'. - // In the above example, the lengths are { 1, 2, 2 }, not { 4, 5, 5 }. - template - void exactMatchSearch(const key_type *key, U &result, - std::size_t length = 0, std::size_t node_pos = 0) const { - result = exactMatchSearch(key, length, node_pos); - } - // The 2nd exactMatchSearch() returns a result instead of updating the 2nd - // argument. So, the following exactMatchSearch() has only 3 arguments. - template - inline U exactMatchSearch(const key_type *key, std::size_t length = 0, - std::size_t node_pos = 0) const; - - // commonPrefixSearch() searches for keys which match a prefix of the given - // string. If `length' is 0, `key' is handled as a zero-terminated string. - // The values and the lengths of at most `max_num_results' matched keys are - // stored in `results'. commonPrefixSearch() returns the number of matched - // keys. Note that the return value can be larger than `max_num_results' if - // there are more than `max_num_results' matches. If you want to get all the - // results, allocate more spaces and call commonPrefixSearch() again. - // `node_pos' works as well as in exactMatchSearch(). - template - inline std::size_t commonPrefixSearch(const key_type *key, U *results, - std::size_t max_num_results, std::size_t length = 0, - std::size_t node_pos = 0) const; - - // In Darts-clone, a dictionary is a deterministic finite-state automaton - // (DFA) and traverse() tests transitions on the DFA. The initial state is - // `node_pos' and traverse() chooses transitions labeled key[key_pos], - // key[key_pos + 1], ... in order. If there is not a transition labeled - // key[key_pos + i], traverse() terminates the transitions at that state and - // returns -2. Otherwise, traverse() ends without a termination and returns - // -1 or a nonnegative value, -1 indicates that the final state was not an - // accept state. When a nonnegative value is returned, it is the value - // associated with the final accept state. That is, traverse() returns the - // value associated with the given key if it exists. Note that traverse() - // updates `node_pos' and `key_pos' after each transition. - inline value_type traverse(const key_type *key, std::size_t &node_pos, - std::size_t &key_pos, std::size_t length = 0) const; - - private: - typedef Details::uchar_type uchar_type; - typedef Details::id_type id_type; - typedef Details::DoubleArrayUnit unit_type; - - std::size_t size_; - const unit_type *array_; - unit_type *buf_; - - // Disallows copy and assignment. - DoubleArrayImpl(const DoubleArrayImpl &); - DoubleArrayImpl &operator=(const DoubleArrayImpl &); -}; - -// is the typical instance of . It uses -// as the type of values and it is suitable for most cases. -typedef DoubleArrayImpl DoubleArray; - -// The interface section ends here. For using Darts-clone, there is no need -// to read the remaining section, which gives the implementation of -// Darts-clone. - -// -// Member functions of DoubleArrayImpl (except build()). -// - -template -int DoubleArrayImpl::open(const char *file_name, - const char *mode, std::size_t offset, std::size_t size) { -#ifdef _MSC_VER - std::FILE *file; - if (::fopen_s(&file, file_name, mode) != 0) { - return -1; - } -#else - std::FILE *file = std::fopen(file_name, mode); - if (file == NULL) { - return -1; - } -#endif - - if (size == 0) { - if (std::fseek(file, 0, SEEK_END) != 0) { - std::fclose(file); - return -1; - } - size = std::ftell(file) - offset; - } - - if (std::fseek(file, offset, SEEK_SET) != 0) { - std::fclose(file); - return -1; - } - - size /= unit_size(); - unit_type *buf; - try { - buf = new unit_type[size]; - } catch (const std::bad_alloc &) { - std::fclose(file); - DARTS_THROW("failed to open double-array: std::bad_alloc"); - } - - if (std::fread(buf, unit_size(), size, file) != size) { - std::fclose(file); - delete[] buf; - return -1; - } - std::fclose(file); - - clear(); - - size_ = size; - array_ = buf; - buf_ = buf; - return 0; -} - -template -int DoubleArrayImpl::save(const char *file_name, - const char *mode, std::size_t) const { - if (size() == 0) { - return -1; - } - -#ifdef _MSC_VER - std::FILE *file; - if (::fopen_s(&file, file_name, mode) != 0) { - return -1; - } -#else - std::FILE *file = std::fopen(file_name, mode); - if (file == NULL) { - return -1; - } -#endif - - if (std::fwrite(array_, unit_size(), size(), file) != size()) { - std::fclose(file); - return -1; - } - std::fclose(file); - return 0; -} - -template -template -inline U DoubleArrayImpl::exactMatchSearch(const key_type *key, - std::size_t length, std::size_t node_pos) const { - U result; - set_result(&result, static_cast(-1), 0); - - unit_type unit = array_[node_pos]; - if (length != 0) { - for (std::size_t i = 0; i < length; ++i) { - node_pos ^= unit.offset() ^ static_cast(key[i]); - unit = array_[node_pos]; - if (unit.label() != static_cast(key[i])) { - return result; - } - } - } else { - for ( ; key[length] != '\0'; ++length) { - node_pos ^= unit.offset() ^ static_cast(key[length]); - unit = array_[node_pos]; - if (unit.label() != static_cast(key[length])) { - return result; - } - } - } - - if (!unit.has_leaf()) { - return result; - } - unit = array_[node_pos ^ unit.offset()]; - set_result(&result, static_cast(unit.value()), length); - return result; -} - -template -template -inline std::size_t DoubleArrayImpl::commonPrefixSearch( - const key_type *key, U *results, std::size_t max_num_results, - std::size_t length, std::size_t node_pos) const { - std::size_t num_results = 0; - - unit_type unit = array_[node_pos]; - node_pos ^= unit.offset(); - if (length != 0) { - for (std::size_t i = 0; i < length; ++i) { - node_pos ^= static_cast(key[i]); - unit = array_[node_pos]; - if (unit.label() != static_cast(key[i])) { - return num_results; - } - - node_pos ^= unit.offset(); - if (unit.has_leaf()) { - if (num_results < max_num_results) { - set_result(&results[num_results], static_cast( - array_[node_pos].value()), i + 1); - } - ++num_results; - } - } - } else { - for ( ; key[length] != '\0'; ++length) { - node_pos ^= static_cast(key[length]); - unit = array_[node_pos]; - if (unit.label() != static_cast(key[length])) { - return num_results; - } - - node_pos ^= unit.offset(); - if (unit.has_leaf()) { - if (num_results < max_num_results) { - set_result(&results[num_results], static_cast( - array_[node_pos].value()), length + 1); - } - ++num_results; - } - } - } - - return num_results; -} - -template -inline typename DoubleArrayImpl::value_type -DoubleArrayImpl::traverse(const key_type *key, - std::size_t &node_pos, std::size_t &key_pos, std::size_t length) const { - id_type id = static_cast(node_pos); - unit_type unit = array_[id]; - - if (length != 0) { - for ( ; key_pos < length; ++key_pos) { - id ^= unit.offset() ^ static_cast(key[key_pos]); - unit = array_[id]; - if (unit.label() != static_cast(key[key_pos])) { - return static_cast(-2); - } - node_pos = id; - } - } else { - for ( ; key[key_pos] != '\0'; ++key_pos) { - id ^= unit.offset() ^ static_cast(key[key_pos]); - unit = array_[id]; - if (unit.label() != static_cast(key[key_pos])) { - return static_cast(-2); - } - node_pos = id; - } - } - - if (!unit.has_leaf()) { - return static_cast(-1); - } - unit = array_[id ^ unit.offset()]; - return static_cast(unit.value()); -} - -namespace Details { - -// -// Memory management of array. -// - -template -class AutoArray { - public: - explicit AutoArray(T *array = NULL) : array_(array) {} - ~AutoArray() { - clear(); - } - - const T &operator[](std::size_t id) const { - return array_[id]; - } - T &operator[](std::size_t id) { - return array_[id]; - } - - bool empty() const { - return array_ == NULL; - } - - void clear() { - if (array_ != NULL) { - delete[] array_; - array_ = NULL; - } - } - void swap(AutoArray *array) { - T *temp = array_; - array_ = array->array_; - array->array_ = temp; - } - void reset(T *array = NULL) { - AutoArray(array).swap(this); - } - - private: - T *array_; - - // Disallows copy and assignment. - AutoArray(const AutoArray &); - AutoArray &operator=(const AutoArray &); -}; - -// -// Memory management of resizable array. -// - -template -class AutoPool { - public: - AutoPool() : buf_(), size_(0), capacity_(0) {} - ~AutoPool() { clear(); } - - const T &operator[](std::size_t id) const { - return *(reinterpret_cast(&buf_[0]) + id); - } - T &operator[](std::size_t id) { - return *(reinterpret_cast(&buf_[0]) + id); - } - - bool empty() const { - return size_ == 0; - } - std::size_t size() const { - return size_; - } - - void clear() { - resize(0); - buf_.clear(); - size_ = 0; - capacity_ = 0; - } - - void push_back(const T &value) { - append(value); - } - void pop_back() { - (*this)[--size_].~T(); - } - - void append() { - if (size_ == capacity_) - resize_buf(size_ + 1); - new(&(*this)[size_++]) T; - } - void append(const T &value) { - if (size_ == capacity_) - resize_buf(size_ + 1); - new(&(*this)[size_++]) T(value); - } - - void resize(std::size_t size) { - while (size_ > size) { - (*this)[--size_].~T(); - } - if (size > capacity_) { - resize_buf(size); - } - while (size_ < size) { - new(&(*this)[size_++]) T; - } - } - void resize(std::size_t size, const T &value) { - while (size_ > size) { - (*this)[--size_].~T(); - } - if (size > capacity_) { - resize_buf(size); - } - while (size_ < size) { - new(&(*this)[size_++]) T(value); - } - } - - void reserve(std::size_t size) { - if (size > capacity_) { - resize_buf(size); - } - } - - private: - AutoArray buf_; - std::size_t size_; - std::size_t capacity_; - - // Disallows copy and assignment. - AutoPool(const AutoPool &); - AutoPool &operator=(const AutoPool &); - - void resize_buf(std::size_t size); -}; - -template -void AutoPool::resize_buf(std::size_t size) { - std::size_t capacity; - if (size >= capacity_ * 2) { - capacity = size; - } else { - capacity = 1; - while (capacity < size) { - capacity <<= 1; - } - } - - AutoArray buf; - try { - buf.reset(new char[sizeof(T) * capacity]); - } catch (const std::bad_alloc &) { - DARTS_THROW("failed to resize pool: std::bad_alloc"); - } - - if (size_ > 0) { - T *src = reinterpret_cast(&buf_[0]); - T *dest = reinterpret_cast(&buf[0]); - for (std::size_t i = 0; i < size_; ++i) { - new(&dest[i]) T(src[i]); - src[i].~T(); - } - } - - buf_.swap(&buf); - capacity_ = capacity; -} - -// -// Memory management of stack. -// - -template -class AutoStack { - public: - AutoStack() : pool_() {} - ~AutoStack() { - clear(); - } - - const T &top() const { - return pool_[size() - 1]; - } - T &top() { - return pool_[size() - 1]; - } - - bool empty() const { - return pool_.empty(); - } - std::size_t size() const { - return pool_.size(); - } - - void push(const T &value) { - pool_.push_back(value); - } - void pop() { - pool_.pop_back(); - } - - void clear() { - pool_.clear(); - } - - private: - AutoPool pool_; - - // Disallows copy and assignment. - AutoStack(const AutoStack &); - AutoStack &operator=(const AutoStack &); -}; - -// -// Succinct bit vector. -// - -class BitVector { - public: - BitVector() : units_(), ranks_(), num_ones_(0), size_(0) {} - ~BitVector() { - clear(); - } - - bool operator[](std::size_t id) const { - return (units_[id / UNIT_SIZE] >> (id % UNIT_SIZE) & 1) == 1; - } - - id_type rank(std::size_t id) const { - std::size_t unit_id = id / UNIT_SIZE; - return ranks_[unit_id] + pop_count(units_[unit_id] - & (~0U >> (UNIT_SIZE - (id % UNIT_SIZE) - 1))); - } - - void set(std::size_t id, bool bit) { - if (bit) { - units_[id / UNIT_SIZE] |= 1U << (id % UNIT_SIZE); - } else { - units_[id / UNIT_SIZE] &= ~(1U << (id % UNIT_SIZE)); - } - } - - bool empty() const { - return units_.empty(); - } - std::size_t num_ones() const { - return num_ones_; - } - std::size_t size() const { - return size_; - } - - void append() { - if ((size_ % UNIT_SIZE) == 0) { - units_.append(0); - } - ++size_; - } - void build(); - - void clear() { - units_.clear(); - ranks_.clear(); - } - - private: - enum { UNIT_SIZE = sizeof(id_type) * 8 }; - - AutoPool units_; - AutoArray ranks_; - std::size_t num_ones_; - std::size_t size_; - - // Disallows copy and assignment. - BitVector(const BitVector &); - BitVector &operator=(const BitVector &); - - static id_type pop_count(id_type unit) { - unit = ((unit & 0xAAAAAAAA) >> 1) + (unit & 0x55555555); - unit = ((unit & 0xCCCCCCCC) >> 2) + (unit & 0x33333333); - unit = ((unit >> 4) + unit) & 0x0F0F0F0F; - unit += unit >> 8; - unit += unit >> 16; - return unit & 0xFF; - } -}; - -inline void BitVector::build() { - try { - ranks_.reset(new id_type[units_.size()]); - } catch (const std::bad_alloc &) { - DARTS_THROW("failed to build rank index: std::bad_alloc"); - } - - num_ones_ = 0; - for (std::size_t i = 0; i < units_.size(); ++i) { - ranks_[i] = num_ones_; - num_ones_ += pop_count(units_[i]); - } -} - -// -// Keyset. -// - -template -class Keyset { - public: - Keyset(std::size_t num_keys, const char_type * const *keys, - const std::size_t *lengths, const T *values) : - num_keys_(num_keys), keys_(keys), lengths_(lengths), values_(values) {} - - std::size_t num_keys() const { - return num_keys_; - } - const char_type *keys(std::size_t id) const { - return keys_[id]; - } - uchar_type keys(std::size_t key_id, std::size_t char_id) const { - if (has_lengths() && char_id >= lengths_[key_id]) - return '\0'; - return keys_[key_id][char_id]; - } - - bool has_lengths() const { - return lengths_ != NULL; - } - std::size_t lengths(std::size_t id) const { - if (has_lengths()) { - return lengths_[id]; - } - std::size_t length = 0; - while (keys_[id][length] != '\0') { - ++length; - } - return length; - } - - bool has_values() const { - return values_ != NULL; - } - value_type values(std::size_t id) const { - if (has_values()) { - return static_cast(values_[id]); - } - return static_cast(id); - } - - private: - std::size_t num_keys_; - const char_type * const * keys_; - const std::size_t *lengths_; - const T *values_; - - // Disallows copy and assignment. - Keyset(const Keyset &); - Keyset &operator=(const Keyset &); -}; - -// -// Node of Directed Acyclic Word Graph (DAWG). -// - -class DawgNode { - public: - DawgNode() : child_(0), sibling_(0), label_('\0'), - is_state_(false), has_sibling_(false) {} - - void set_child(id_type child) { - child_ = child; - } - void set_sibling(id_type sibling) { - sibling_ = sibling; - } - void set_value(value_type value) { - child_ = value; - } - void set_label(uchar_type label) { - label_ = label; - } - void set_is_state(bool is_state) { - is_state_ = is_state; - } - void set_has_sibling(bool has_sibling) { - has_sibling_ = has_sibling; - } - - id_type child() const { - return child_; - } - id_type sibling() const { - return sibling_; - } - value_type value() const { - return static_cast(child_); - } - uchar_type label() const { - return label_; - } - bool is_state() const { - return is_state_; - } - bool has_sibling() const { - return has_sibling_; - } - - id_type unit() const { - if (label_ == '\0') { - return (child_ << 1) | (has_sibling_ ? 1 : 0); - } - return (child_ << 2) | (is_state_ ? 2 : 0) | (has_sibling_ ? 1 : 0); - } - - private: - id_type child_; - id_type sibling_; - uchar_type label_; - bool is_state_; - bool has_sibling_; - - // Copyable. -}; - -// -// Fixed unit of Directed Acyclic Word Graph (DAWG). -// - -class DawgUnit { - public: - explicit DawgUnit(id_type unit = 0) : unit_(unit) {} - DawgUnit(const DawgUnit &unit) : unit_(unit.unit_) {} - - DawgUnit &operator=(id_type unit) { - unit_ = unit; - return *this; - } - - id_type unit() const { - return unit_; - } - - id_type child() const { - return unit_ >> 2; - } - bool has_sibling() const { - return (unit_ & 1) == 1; - } - value_type value() const { - return static_cast(unit_ >> 1); - } - bool is_state() const { - return (unit_ & 2) == 2; - } - - private: - id_type unit_; - - // Copyable. -}; - -// -// Directed Acyclic Word Graph (DAWG) builder. -// - -class DawgBuilder { - public: - DawgBuilder() : nodes_(), units_(), labels_(), is_intersections_(), - table_(), node_stack_(), recycle_bin_(), num_states_(0) {} - ~DawgBuilder() { - clear(); - } - - id_type root() const { - return 0; - } - - id_type child(id_type id) const { - return units_[id].child(); - } - id_type sibling(id_type id) const { - return units_[id].has_sibling() ? (id + 1) : 0; - } - int value(id_type id) const { - return units_[id].value(); - } - - bool is_leaf(id_type id) const { - return label(id) == '\0'; - } - uchar_type label(id_type id) const { - return labels_[id]; - } - - bool is_intersection(id_type id) const { - return is_intersections_[id]; - } - id_type intersection_id(id_type id) const { - return is_intersections_.rank(id) - 1; - } - - std::size_t num_intersections() const { - return is_intersections_.num_ones(); - } - - std::size_t size() const { - return units_.size(); - } - - void init(); - void finish(); - - void insert(const char *key, std::size_t length, value_type value); - - void clear(); - - private: - enum { INITIAL_TABLE_SIZE = 1 << 10 }; - - AutoPool nodes_; - AutoPool units_; - AutoPool labels_; - BitVector is_intersections_; - AutoPool table_; - AutoStack node_stack_; - AutoStack recycle_bin_; - std::size_t num_states_; - - // Disallows copy and assignment. - DawgBuilder(const DawgBuilder &); - DawgBuilder &operator=(const DawgBuilder &); - - void flush(id_type id); - - void expand_table(); - - id_type find_unit(id_type id, id_type *hash_id) const; - id_type find_node(id_type node_id, id_type *hash_id) const; - - bool are_equal(id_type node_id, id_type unit_id) const; - - id_type hash_unit(id_type id) const; - id_type hash_node(id_type id) const; - - id_type append_node(); - id_type append_unit(); - - void free_node(id_type id) { - recycle_bin_.push(id); - } - - static id_type hash(id_type key) { - key = ~key + (key << 15); // key = (key << 15) - key - 1; - key = key ^ (key >> 12); - key = key + (key << 2); - key = key ^ (key >> 4); - key = key * 2057; // key = (key + (key << 3)) + (key << 11); - key = key ^ (key >> 16); - return key; - } -}; - -inline void DawgBuilder::init() { - table_.resize(INITIAL_TABLE_SIZE, 0); - - append_node(); - append_unit(); - - num_states_ = 1; - - nodes_[0].set_label(0xFF); - node_stack_.push(0); -} - -inline void DawgBuilder::finish() { - flush(0); - - units_[0] = nodes_[0].unit(); - labels_[0] = nodes_[0].label(); - - nodes_.clear(); - table_.clear(); - node_stack_.clear(); - recycle_bin_.clear(); - - is_intersections_.build(); -} - -inline void DawgBuilder::insert(const char *key, std::size_t length, - value_type value) { - if (value < 0) { - DARTS_THROW("failed to insert key: negative value"); - } else if (length == 0) { - DARTS_THROW("failed to insert key: zero-length key"); - } - - id_type id = 0; - std::size_t key_pos = 0; - - for ( ; key_pos <= length; ++key_pos) { - id_type child_id = nodes_[id].child(); - if (child_id == 0) { - break; - } - - uchar_type key_label = static_cast(key[key_pos]); - if (key_pos < length && key_label == '\0') { - DARTS_THROW("failed to insert key: invalid null character"); - } - - uchar_type unit_label = nodes_[child_id].label(); - if (key_label < unit_label) { - DARTS_THROW("failed to insert key: wrong key order"); - } else if (key_label > unit_label) { - nodes_[child_id].set_has_sibling(true); - flush(child_id); - break; - } - id = child_id; - } - - if (key_pos > length) { - return; - } - - for ( ; key_pos <= length; ++key_pos) { - uchar_type key_label = static_cast( - (key_pos < length) ? key[key_pos] : '\0'); - id_type child_id = append_node(); - - if (nodes_[id].child() == 0) { - nodes_[child_id].set_is_state(true); - } - nodes_[child_id].set_sibling(nodes_[id].child()); - nodes_[child_id].set_label(key_label); - nodes_[id].set_child(child_id); - node_stack_.push(child_id); - - id = child_id; - } - nodes_[id].set_value(value); -} - -inline void DawgBuilder::clear() { - nodes_.clear(); - units_.clear(); - labels_.clear(); - is_intersections_.clear(); - table_.clear(); - node_stack_.clear(); - recycle_bin_.clear(); - num_states_ = 0; -} - -inline void DawgBuilder::flush(id_type id) { - while (node_stack_.top() != id) { - id_type node_id = node_stack_.top(); - node_stack_.pop(); - - if (num_states_ >= table_.size() - (table_.size() >> 2)) { - expand_table(); - } - - id_type num_siblings = 0; - for (id_type i = node_id; i != 0; i = nodes_[i].sibling()) { - ++num_siblings; - } - - id_type hash_id; - id_type match_id = find_node(node_id, &hash_id); - if (match_id != 0) { - is_intersections_.set(match_id, true); - } else { - id_type unit_id = 0; - for (id_type i = 0; i < num_siblings; ++i) { - unit_id = append_unit(); - } - for (id_type i = node_id; i != 0; i = nodes_[i].sibling()) { - units_[unit_id] = nodes_[i].unit(); - labels_[unit_id] = nodes_[i].label(); - --unit_id; - } - match_id = unit_id + 1; - table_[hash_id] = match_id; - ++num_states_; - } - - for (id_type i = node_id, next; i != 0; i = next) { - next = nodes_[i].sibling(); - free_node(i); - } - - nodes_[node_stack_.top()].set_child(match_id); - } - node_stack_.pop(); -} - -inline void DawgBuilder::expand_table() { - std::size_t table_size = table_.size() << 1; - table_.clear(); - table_.resize(table_size, 0); - - for (std::size_t i = 1; i < units_.size(); ++i) { - id_type id = static_cast(i); - if (labels_[id] == '\0' || units_[id].is_state()) { - id_type hash_id; - find_unit(id, &hash_id); - table_[hash_id] = id; - } - } -} - -inline id_type DawgBuilder::find_unit(id_type id, id_type *hash_id) const { - *hash_id = hash_unit(id) % table_.size(); - for ( ; ; *hash_id = (*hash_id + 1) % table_.size()) { - id_type unit_id = table_[*hash_id]; - if (unit_id == 0) { - break; - } - - // There must not be the same unit. - } - return 0; -} - -inline id_type DawgBuilder::find_node(id_type node_id, - id_type *hash_id) const { - *hash_id = hash_node(node_id) % table_.size(); - for ( ; ; *hash_id = (*hash_id + 1) % table_.size()) { - id_type unit_id = table_[*hash_id]; - if (unit_id == 0) { - break; - } - - if (are_equal(node_id, unit_id)) { - return unit_id; - } - } - return 0; -} - -inline bool DawgBuilder::are_equal(id_type node_id, id_type unit_id) const { - for (id_type i = nodes_[node_id].sibling(); i != 0; - i = nodes_[i].sibling()) { - if (units_[unit_id].has_sibling() == false) { - return false; - } - ++unit_id; - } - if (units_[unit_id].has_sibling() == true) { - return false; - } - - for (id_type i = node_id; i != 0; i = nodes_[i].sibling(), --unit_id) { - if (nodes_[i].unit() != units_[unit_id].unit() || - nodes_[i].label() != labels_[unit_id]) { - return false; - } - } - return true; -} - -inline id_type DawgBuilder::hash_unit(id_type id) const { - id_type hash_value = 0; - for ( ; id != 0; ++id) { - id_type unit = units_[id].unit(); - uchar_type label = labels_[id]; - hash_value ^= hash((label << 24) ^ unit); - - if (units_[id].has_sibling() == false) { - break; - } - } - return hash_value; -} - -inline id_type DawgBuilder::hash_node(id_type id) const { - id_type hash_value = 0; - for ( ; id != 0; id = nodes_[id].sibling()) { - id_type unit = nodes_[id].unit(); - uchar_type label = nodes_[id].label(); - hash_value ^= hash((label << 24) ^ unit); - } - return hash_value; -} - -inline id_type DawgBuilder::append_unit() { - is_intersections_.append(); - units_.append(); - labels_.append(); - - return static_cast(is_intersections_.size() - 1); -} - -inline id_type DawgBuilder::append_node() { - id_type id; - if (recycle_bin_.empty()) { - id = static_cast(nodes_.size()); - nodes_.append(); - } else { - id = recycle_bin_.top(); - nodes_[id] = DawgNode(); - recycle_bin_.pop(); - } - return id; -} - -// -// Unit of double-array builder. -// - -class DoubleArrayBuilderUnit { - public: - DoubleArrayBuilderUnit() : unit_(0) {} - - void set_has_leaf(bool has_leaf) { - if (has_leaf) { - unit_ |= 1U << 8; - } else { - unit_ &= ~(1U << 8); - } - } - void set_value(value_type value) { - unit_ = value | (1U << 31); - } - void set_label(uchar_type label) { - unit_ = (unit_ & ~0xFFU) | label; - } - void set_offset(id_type offset) { - if (offset >= 1U << 29) { - DARTS_THROW("failed to modify unit: too large offset"); - } - unit_ &= (1U << 31) | (1U << 8) | 0xFF; - if (offset < 1U << 21) { - unit_ |= (offset << 10); - } else { - unit_ |= (offset << 2) | (1U << 9); - } - } - - private: - id_type unit_; - - // Copyable. -}; - -// -// Extra unit of double-array builder. -// - -class DoubleArrayBuilderExtraUnit { - public: - DoubleArrayBuilderExtraUnit() : prev_(0), next_(0), - is_fixed_(false), is_used_(false) {} - - void set_prev(id_type prev) { - prev_ = prev; - } - void set_next(id_type next) { - next_ = next; - } - void set_is_fixed(bool is_fixed) { - is_fixed_ = is_fixed; - } - void set_is_used(bool is_used) { - is_used_ = is_used; - } - - id_type prev() const { - return prev_; - } - id_type next() const { - return next_; - } - bool is_fixed() const { - return is_fixed_; - } - bool is_used() const { - return is_used_; - } - - private: - id_type prev_; - id_type next_; - bool is_fixed_; - bool is_used_; - - // Copyable. -}; - -// -// DAWG -> double-array converter. -// - -class DoubleArrayBuilder { - public: - explicit DoubleArrayBuilder(progress_func_type progress_func) - : progress_func_(progress_func), units_(), extras_(), labels_(), - table_(), extras_head_(0) {} - ~DoubleArrayBuilder() { - clear(); - } - - template - void build(const Keyset &keyset); - void copy(std::size_t *size_ptr, DoubleArrayUnit **buf_ptr) const; - - void clear(); - - private: - enum { BLOCK_SIZE = 256 }; - enum { NUM_EXTRA_BLOCKS = 16 }; - enum { NUM_EXTRAS = BLOCK_SIZE * NUM_EXTRA_BLOCKS }; - - enum { UPPER_MASK = 0xFF << 21 }; - enum { LOWER_MASK = 0xFF }; - - typedef DoubleArrayBuilderUnit unit_type; - typedef DoubleArrayBuilderExtraUnit extra_type; - - progress_func_type progress_func_; - AutoPool units_; - AutoArray extras_; - AutoPool labels_; - AutoArray table_; - id_type extras_head_; - - // Disallows copy and assignment. - DoubleArrayBuilder(const DoubleArrayBuilder &); - DoubleArrayBuilder &operator=(const DoubleArrayBuilder &); - - std::size_t num_blocks() const { - return units_.size() / BLOCK_SIZE; - } - - const extra_type &extras(id_type id) const { - return extras_[id % NUM_EXTRAS]; - } - extra_type &extras(id_type id) { - return extras_[id % NUM_EXTRAS]; - } - - template - void build_dawg(const Keyset &keyset, DawgBuilder *dawg_builder); - void build_from_dawg(const DawgBuilder &dawg); - void build_from_dawg(const DawgBuilder &dawg, - id_type dawg_id, id_type dic_id); - id_type arrange_from_dawg(const DawgBuilder &dawg, - id_type dawg_id, id_type dic_id); - - template - void build_from_keyset(const Keyset &keyset); - template - void build_from_keyset(const Keyset &keyset, std::size_t begin, - std::size_t end, std::size_t depth, id_type dic_id); - template - id_type arrange_from_keyset(const Keyset &keyset, std::size_t begin, - std::size_t end, std::size_t depth, id_type dic_id); - - id_type find_valid_offset(id_type id) const; - bool is_valid_offset(id_type id, id_type offset) const; - - void reserve_id(id_type id); - void expand_units(); - - void fix_all_blocks(); - void fix_block(id_type block_id); -}; - -template -void DoubleArrayBuilder::build(const Keyset &keyset) { - if (keyset.has_values()) { - Details::DawgBuilder dawg_builder; - build_dawg(keyset, &dawg_builder); - build_from_dawg(dawg_builder); - dawg_builder.clear(); - } else { - build_from_keyset(keyset); - } -} - -inline void DoubleArrayBuilder::copy(std::size_t *size_ptr, - DoubleArrayUnit **buf_ptr) const { - if (size_ptr != NULL) { - *size_ptr = units_.size(); - } - if (buf_ptr != NULL) { - *buf_ptr = new DoubleArrayUnit[units_.size()]; - unit_type *units = reinterpret_cast(*buf_ptr); - for (std::size_t i = 0; i < units_.size(); ++i) { - units[i] = units_[i]; - } - } -} - -inline void DoubleArrayBuilder::clear() { - units_.clear(); - extras_.clear(); - labels_.clear(); - table_.clear(); - extras_head_ = 0; -} - -template -void DoubleArrayBuilder::build_dawg(const Keyset &keyset, - DawgBuilder *dawg_builder) { - dawg_builder->init(); - for (std::size_t i = 0; i < keyset.num_keys(); ++i) { - dawg_builder->insert(keyset.keys(i), keyset.lengths(i), keyset.values(i)); - if (progress_func_ != NULL) { - progress_func_(i + 1, keyset.num_keys() + 1); - } - } - dawg_builder->finish(); -} - -inline void DoubleArrayBuilder::build_from_dawg(const DawgBuilder &dawg) { - std::size_t num_units = 1; - while (num_units < dawg.size()) { - num_units <<= 1; - } - units_.reserve(num_units); - - table_.reset(new id_type[dawg.num_intersections()]); - for (std::size_t i = 0; i < dawg.num_intersections(); ++i) { - table_[i] = 0; - } - - extras_.reset(new extra_type[NUM_EXTRAS]); - - reserve_id(0); - extras(0).set_is_used(true); - units_[0].set_offset(1); - units_[0].set_label('\0'); - - if (dawg.child(dawg.root()) != 0) { - build_from_dawg(dawg, dawg.root(), 0); - } - - fix_all_blocks(); - - extras_.clear(); - labels_.clear(); - table_.clear(); -} - -inline void DoubleArrayBuilder::build_from_dawg(const DawgBuilder &dawg, - id_type dawg_id, id_type dic_id) { - id_type dawg_child_id = dawg.child(dawg_id); - if (dawg.is_intersection(dawg_child_id)) { - id_type intersection_id = dawg.intersection_id(dawg_child_id); - id_type offset = table_[intersection_id]; - if (offset != 0) { - offset ^= dic_id; - if (!(offset & UPPER_MASK) || !(offset & LOWER_MASK)) { - if (dawg.is_leaf(dawg_child_id)) { - units_[dic_id].set_has_leaf(true); - } - units_[dic_id].set_offset(offset); - return; - } - } - } - - id_type offset = arrange_from_dawg(dawg, dawg_id, dic_id); - if (dawg.is_intersection(dawg_child_id)) { - table_[dawg.intersection_id(dawg_child_id)] = offset; - } - - do { - uchar_type child_label = dawg.label(dawg_child_id); - id_type dic_child_id = offset ^ child_label; - if (child_label != '\0') { - build_from_dawg(dawg, dawg_child_id, dic_child_id); - } - dawg_child_id = dawg.sibling(dawg_child_id); - } while (dawg_child_id != 0); -} - -inline id_type DoubleArrayBuilder::arrange_from_dawg(const DawgBuilder &dawg, - id_type dawg_id, id_type dic_id) { - labels_.resize(0); - - id_type dawg_child_id = dawg.child(dawg_id); - while (dawg_child_id != 0) { - labels_.append(dawg.label(dawg_child_id)); - dawg_child_id = dawg.sibling(dawg_child_id); - } - - id_type offset = find_valid_offset(dic_id); - units_[dic_id].set_offset(dic_id ^ offset); - - dawg_child_id = dawg.child(dawg_id); - for (std::size_t i = 0; i < labels_.size(); ++i) { - id_type dic_child_id = offset ^ labels_[i]; - reserve_id(dic_child_id); - - if (dawg.is_leaf(dawg_child_id)) { - units_[dic_id].set_has_leaf(true); - units_[dic_child_id].set_value(dawg.value(dawg_child_id)); - } else { - units_[dic_child_id].set_label(labels_[i]); - } - - dawg_child_id = dawg.sibling(dawg_child_id); - } - extras(offset).set_is_used(true); - - return offset; -} - -template -void DoubleArrayBuilder::build_from_keyset(const Keyset &keyset) { - std::size_t num_units = 1; - while (num_units < keyset.num_keys()) { - num_units <<= 1; - } - units_.reserve(num_units); - - extras_.reset(new extra_type[NUM_EXTRAS]); - - reserve_id(0); - extras(0).set_is_used(true); - units_[0].set_offset(1); - units_[0].set_label('\0'); - - if (keyset.num_keys() > 0) { - build_from_keyset(keyset, 0, keyset.num_keys(), 0, 0); - } - - fix_all_blocks(); - - extras_.clear(); - labels_.clear(); -} - -template -void DoubleArrayBuilder::build_from_keyset(const Keyset &keyset, - std::size_t begin, std::size_t end, std::size_t depth, id_type dic_id) { - id_type offset = arrange_from_keyset(keyset, begin, end, depth, dic_id); - - while (begin < end) { - if (keyset.keys(begin, depth) != '\0') { - break; - } - ++begin; - } - if (begin == end) { - return; - } - - std::size_t last_begin = begin; - uchar_type last_label = keyset.keys(begin, depth); - while (++begin < end) { - uchar_type label = keyset.keys(begin, depth); - if (label != last_label) { - build_from_keyset(keyset, last_begin, begin, - depth + 1, offset ^ last_label); - last_begin = begin; - last_label = keyset.keys(begin, depth); - } - } - build_from_keyset(keyset, last_begin, end, depth + 1, offset ^ last_label); -} - -template -id_type DoubleArrayBuilder::arrange_from_keyset(const Keyset &keyset, - std::size_t begin, std::size_t end, std::size_t depth, id_type dic_id) { - labels_.resize(0); - - value_type value = -1; - for (std::size_t i = begin; i < end; ++i) { - uchar_type label = keyset.keys(i, depth); - if (label == '\0') { - if (keyset.has_lengths() && depth < keyset.lengths(i)) { - DARTS_THROW("failed to build double-array: " - "invalid null character"); - } else if (keyset.values(i) < 0) { - DARTS_THROW("failed to build double-array: negative value"); - } - - if (value == -1) { - value = keyset.values(i); - } - if (progress_func_ != NULL) { - progress_func_(i + 1, keyset.num_keys() + 1); - } - } - - if (labels_.empty()) { - labels_.append(label); - } else if (label != labels_[labels_.size() - 1]) { - if (label < labels_[labels_.size() - 1]) { - DARTS_THROW("failed to build double-array: wrong key order"); - } - labels_.append(label); - } - } - - id_type offset = find_valid_offset(dic_id); - units_[dic_id].set_offset(dic_id ^ offset); - - for (std::size_t i = 0; i < labels_.size(); ++i) { - id_type dic_child_id = offset ^ labels_[i]; - reserve_id(dic_child_id); - if (labels_[i] == '\0') { - units_[dic_id].set_has_leaf(true); - units_[dic_child_id].set_value(value); - } else { - units_[dic_child_id].set_label(labels_[i]); - } - } - extras(offset).set_is_used(true); - - return offset; -} - -inline id_type DoubleArrayBuilder::find_valid_offset(id_type id) const { - if (extras_head_ >= units_.size()) { - return units_.size() | (id & LOWER_MASK); - } - - id_type unfixed_id = extras_head_; - do { - id_type offset = unfixed_id ^ labels_[0]; - if (is_valid_offset(id, offset)) { - return offset; - } - unfixed_id = extras(unfixed_id).next(); - } while (unfixed_id != extras_head_); - - return units_.size() | (id & LOWER_MASK); -} - -inline bool DoubleArrayBuilder::is_valid_offset(id_type id, - id_type offset) const { - if (extras(offset).is_used()) { - return false; - } - - id_type rel_offset = id ^ offset; - if ((rel_offset & LOWER_MASK) && (rel_offset & UPPER_MASK)) { - return false; - } - - for (std::size_t i = 1; i < labels_.size(); ++i) { - if (extras(offset ^ labels_[i]).is_fixed()) { - return false; - } - } - - return true; -} - -inline void DoubleArrayBuilder::reserve_id(id_type id) { - if (id >= units_.size()) { - expand_units(); - } - - if (id == extras_head_) { - extras_head_ = extras(id).next(); - if (extras_head_ == id) { - extras_head_ = units_.size(); - } - } - extras(extras(id).prev()).set_next(extras(id).next()); - extras(extras(id).next()).set_prev(extras(id).prev()); - extras(id).set_is_fixed(true); -} - -inline void DoubleArrayBuilder::expand_units() { - id_type src_num_units = units_.size(); - id_type src_num_blocks = num_blocks(); - - id_type dest_num_units = src_num_units + BLOCK_SIZE; - id_type dest_num_blocks = src_num_blocks + 1; - - if (dest_num_blocks > NUM_EXTRA_BLOCKS) { - fix_block(src_num_blocks - NUM_EXTRA_BLOCKS); - } - - units_.resize(dest_num_units); - - if (dest_num_blocks > NUM_EXTRA_BLOCKS) { - for (std::size_t id = src_num_units; id < dest_num_units; ++id) { - extras(id).set_is_used(false); - extras(id).set_is_fixed(false); - } - } - - for (id_type i = src_num_units + 1; i < dest_num_units; ++i) { - extras(i - 1).set_next(i); - extras(i).set_prev(i - 1); - } - - extras(src_num_units).set_prev(dest_num_units - 1); - extras(dest_num_units - 1).set_next(src_num_units); - - extras(src_num_units).set_prev(extras(extras_head_).prev()); - extras(dest_num_units - 1).set_next(extras_head_); - - extras(extras(extras_head_).prev()).set_next(src_num_units); - extras(extras_head_).set_prev(dest_num_units - 1); -} - -inline void DoubleArrayBuilder::fix_all_blocks() { - id_type begin = 0; - if (num_blocks() > NUM_EXTRA_BLOCKS) { - begin = num_blocks() - NUM_EXTRA_BLOCKS; - } - id_type end = num_blocks(); - - for (id_type block_id = begin; block_id != end; ++block_id) { - fix_block(block_id); - } -} - -inline void DoubleArrayBuilder::fix_block(id_type block_id) { - id_type begin = block_id * BLOCK_SIZE; - id_type end = begin + BLOCK_SIZE; - - id_type unused_offset = 0; - for (id_type offset = begin; offset != end; ++offset) { - if (!extras(offset).is_used()) { - unused_offset = offset; - break; - } - } - - for (id_type id = begin; id != end; ++id) { - if (!extras(id).is_fixed()) { - reserve_id(id); - units_[id].set_label(static_cast(id ^ unused_offset)); - } - } -} - -} // namespace Details - -// -// Member function build() of DoubleArrayImpl. -// - -template -int DoubleArrayImpl::build(std::size_t num_keys, - const key_type * const *keys, const std::size_t *lengths, - const value_type *values, Details::progress_func_type progress_func) { - Details::Keyset keyset(num_keys, keys, lengths, values); - - Details::DoubleArrayBuilder builder(progress_func); - builder.build(keyset); - - std::size_t size = 0; - unit_type *buf = NULL; - builder.copy(&size, &buf); - - clear(); - - size_ = size; - array_ = buf; - buf_ = buf; - - if (progress_func != NULL) { - progress_func(num_keys + 1, num_keys + 1); - } - - return 0; -} - -} // namespace Darts - -#undef DARTS_INT_TO_STR -#undef DARTS_LINE_TO_STR -#undef DARTS_LINE_STR -#undef DARTS_THROW - -#endif // DARTS_H_ diff --git a/node/global.gypi b/node/global.gypi index 7c50dd4..523e649 100644 --- a/node/global.gypi +++ b/node/global.gypi @@ -1,30 +1,50 @@ { "variables": { - "opencc_version": "1.1.3" + "opencc_version": "1.1.8" }, "target_defaults": { "defines": [ "VERSION=\"<(opencc_version)\"" ], "conditions": [ - ["OS=='linux'", { - "cflags": [ - "-std=c++0x" - ], - "cflags!": ["-fno-exceptions"], - "cflags_cc!": ["-fno-exceptions"], - }], - ["OS=='mac'", { - 'xcode_settings': { - 'GCC_ENABLE_CPP_EXCEPTIONS': 'YES', - 'MACOSX_DEPLOYMENT_TARGET': '10.7', - 'OTHER_CPLUSPLUSFLAGS': ["-std=c++14", "-stdlib=libc++"], - 'OTHER_LDFLAGS': ["-stdlib=libc++"] + [ + "OS==\"linux\"", + { + "cflags": [ + "-std=c++17" + ], + "cflags!": [ + "-fno-exceptions" + ], + "cflags_cc!": [ + "-fno-exceptions" + ] } - }], - ["OS=='win'", { - "defines": ["Opencc_BUILT_AS_STATIC"] - }] + ], + [ + "OS==\"mac\"", + { + "xcode_settings": { + "GCC_ENABLE_CPP_EXCEPTIONS": "YES", + "MACOSX_DEPLOYMENT_TARGET": "10.7", + "OTHER_CPLUSPLUSFLAGS": [ + "-std=c++17", + "-stdlib=libc++" + ], + "OTHER_LDFLAGS": [ + "-stdlib=libc++" + ] + } + } + ], + [ + "OS==\"win\"", + { + "defines": [ + "Opencc_BUILT_AS_STATIC" + ] + } + ] ] } -} +} \ No newline at end of file diff --git a/node/node_opencc.gypi b/node/node_opencc.gypi index d468247..f2af695 100644 --- a/node/node_opencc.gypi +++ b/node/node_opencc.gypi @@ -1,17 +1,19 @@ { - "targets": [{ - "target_name": "opencc", - "sources": [ - "../node/marisa.cc", - "../node/opencc.cc", - ], - "include_dirs": [ - "../node", - "../src", - "../deps/rapidjson-1.1.0", - "../deps/marisa-0.2.6/include", - "../deps/marisa-0.2.6/lib", - "= 8.0.0" } }, + "node_modules/@mapbox/node-pre-gyp": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz", + "integrity": "sha512-Yhlar6v9WQgUp/He7BdgzOz8lqMQ8sU+jkCq7Wx8Myc5YFJLbEe7lgui/V7G1qB1DJykHSGwreceSaD60Y0PUQ==", + "dependencies": { + "detect-libc": "^2.0.0", + "https-proxy-agent": "^5.0.0", + "make-dir": "^3.1.0", + "node-fetch": "^2.6.7", + "nopt": "^5.0.0", + "npmlog": "^5.0.1", + "rimraf": "^3.0.2", + "semver": "^7.3.5", + "tar": "^6.1.11" + }, + "bin": { + "node-pre-gyp": "bin/node-pre-gyp" + } + }, "node_modules/@octokit/auth-token": { - "version": "2.5.0", - "resolved": "https://registry.npmjs.org/@octokit/auth-token/-/auth-token-2.5.0.tgz", - "integrity": "sha512-r5FVUJCOLl19AxiuZD2VRZ/ORjp/4IN98Of6YJoJOkY75CIBuYfmiNHGrDwXr+aLGG55igl9QrxX3hbiXlLb+g==", + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@octokit/auth-token/-/auth-token-4.0.0.tgz", + "integrity": "sha512-tY/msAuJo6ARbK6SPIxZrPBms3xPbfwBrulZe0Wtr/DIY9lje2HeV1uoebShn6mx7SjCHif6EjMvoREj+gZ+SA==", "dev": true, - "dependencies": { - "@octokit/types": "^6.0.3" + "engines": { + "node": ">= 18" } }, "node_modules/@octokit/core": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/@octokit/core/-/core-3.6.0.tgz", - "integrity": "sha512-7RKRKuA4xTjMhY+eG3jthb3hlZCsOwg3rztWh75Xc+ShDWOfDDATWbeZpAHBNRpm4Tv9WgBMOy1zEJYXG6NJ7Q==", + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/@octokit/core/-/core-5.2.0.tgz", + "integrity": "sha512-1LFfa/qnMQvEOAdzlQymH0ulepxbxnCYAKJZfMci/5XJyIHWgEYnDmgnKakbTh7CH2tFQ5O60oYDvns4i9RAIg==", "dev": true, "dependencies": { - "@octokit/auth-token": "^2.4.4", - "@octokit/graphql": "^4.5.8", - "@octokit/request": "^5.6.3", - "@octokit/request-error": "^2.0.5", - "@octokit/types": "^6.0.3", + "@octokit/auth-token": "^4.0.0", + "@octokit/graphql": "^7.1.0", + "@octokit/request": "^8.3.1", + "@octokit/request-error": "^5.1.0", + "@octokit/types": "^13.0.0", "before-after-hook": "^2.2.0", "universal-user-agent": "^6.0.0" + }, + "engines": { + "node": ">= 18" } }, "node_modules/@octokit/endpoint": { - "version": "6.0.12", - "resolved": "https://registry.npmjs.org/@octokit/endpoint/-/endpoint-6.0.12.tgz", - "integrity": "sha512-lF3puPwkQWGfkMClXb4k/eUT/nZKQfxinRWJrdZaJO85Dqwo/G0yOC434Jr2ojwafWJMYqFGFa5ms4jJUgujdA==", + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/@octokit/endpoint/-/endpoint-9.0.5.tgz", + "integrity": "sha512-ekqR4/+PCLkEBF6qgj8WqJfvDq65RH85OAgrtnVp1mSxaXF03u2xW/hUdweGS5654IlC0wkNYC18Z50tSYTAFw==", "dev": true, "dependencies": { - "@octokit/types": "^6.0.3", - "is-plain-object": "^5.0.0", + "@octokit/types": "^13.1.0", "universal-user-agent": "^6.0.0" + }, + "engines": { + "node": ">= 18" } }, "node_modules/@octokit/graphql": { - "version": "4.8.0", - "resolved": "https://registry.npmjs.org/@octokit/graphql/-/graphql-4.8.0.tgz", - "integrity": "sha512-0gv+qLSBLKF0z8TKaSKTsS39scVKF9dbMxJpj3U0vC7wjNWFuIpL/z76Qe2fiuCbDRcJSavkXsVtMS6/dtQQsg==", + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/@octokit/graphql/-/graphql-7.1.0.tgz", + "integrity": "sha512-r+oZUH7aMFui1ypZnAvZmn0KSqAUgE1/tUXIWaqUCa1758ts/Jio84GZuzsvUkme98kv0WFY8//n0J1Z+vsIsQ==", "dev": true, "dependencies": { - "@octokit/request": "^5.6.0", - "@octokit/types": "^6.0.3", + "@octokit/request": "^8.3.0", + "@octokit/types": "^13.0.0", "universal-user-agent": "^6.0.0" + }, + "engines": { + "node": ">= 18" } }, "node_modules/@octokit/openapi-types": { - "version": "12.11.0", - "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-12.11.0.tgz", - "integrity": "sha512-VsXyi8peyRq9PqIz/tpqiL2w3w80OgVMwBHltTml3LmVvXiphgeqmY9mvBw9Wu7e0QWk/fqD37ux8yP5uVekyQ==", + "version": "22.2.0", + "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-22.2.0.tgz", + "integrity": "sha512-QBhVjcUa9W7Wwhm6DBFu6ZZ+1/t/oYxqc2tp81Pi41YNuJinbFRx8B133qVOrAaBbF7D/m0Et6f9/pZt9Rc+tg==", "dev": true }, "node_modules/@octokit/plugin-paginate-rest": { - "version": "2.21.3", - "resolved": "https://registry.npmjs.org/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-2.21.3.tgz", - "integrity": "sha512-aCZTEf0y2h3OLbrgKkrfFdjRL6eSOo8komneVQJnYecAxIej7Bafor2xhuDJOIFau4pk0i/P28/XgtbyPF0ZHw==", + "version": "11.3.1", + "resolved": "https://registry.npmjs.org/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-11.3.1.tgz", + "integrity": "sha512-ryqobs26cLtM1kQxqeZui4v8FeznirUsksiA+RYemMPJ7Micju0WSkv50dBksTuZks9O5cg4wp+t8fZ/cLY56g==", "dev": true, "dependencies": { - "@octokit/types": "^6.40.0" + "@octokit/types": "^13.5.0" + }, + "engines": { + "node": ">= 18" }, "peerDependencies": { - "@octokit/core": ">=2" + "@octokit/core": "5" } }, "node_modules/@octokit/plugin-request-log": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@octokit/plugin-request-log/-/plugin-request-log-1.0.4.tgz", - "integrity": "sha512-mLUsMkgP7K/cnFEw07kWqXGF5LKrOkD+lhCrKvPHXWDywAwuDUeDwWBpc69XK3pNX0uKiVt8g5z96PJ6z9xCFA==", + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/@octokit/plugin-request-log/-/plugin-request-log-4.0.1.tgz", + "integrity": "sha512-GihNqNpGHorUrO7Qa9JbAl0dbLnqJVrV8OXe2Zm5/Y4wFkZQDfTreBzVmiRfJVfE4mClXdihHnbpyyO9FSX4HA==", "dev": true, + "engines": { + "node": ">= 18" + }, "peerDependencies": { - "@octokit/core": ">=3" + "@octokit/core": "5" } }, "node_modules/@octokit/plugin-rest-endpoint-methods": { - "version": "5.16.2", - "resolved": "https://registry.npmjs.org/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-5.16.2.tgz", - "integrity": "sha512-8QFz29Fg5jDuTPXVtey05BLm7OB+M8fnvE64RNegzX7U+5NUXcOcnpTIK0YfSHBg8gYd0oxIq3IZTe9SfPZiRw==", + "version": "13.2.2", + "resolved": "https://registry.npmjs.org/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-13.2.2.tgz", + "integrity": "sha512-EI7kXWidkt3Xlok5uN43suK99VWqc8OaIMktY9d9+RNKl69juoTyxmLoWPIZgJYzi41qj/9zU7G/ljnNOJ5AFA==", "dev": true, "dependencies": { - "@octokit/types": "^6.39.0", - "deprecation": "^2.3.1" + "@octokit/types": "^13.5.0" + }, + "engines": { + "node": ">= 18" }, "peerDependencies": { - "@octokit/core": ">=3" + "@octokit/core": "^5" } }, "node_modules/@octokit/request": { - "version": "5.6.3", - "resolved": "https://registry.npmjs.org/@octokit/request/-/request-5.6.3.tgz", - "integrity": "sha512-bFJl0I1KVc9jYTe9tdGGpAMPy32dLBXXo1dS/YwSCTL/2nd9XeHsY616RE3HPXDVk+a+dBuzyz5YdlXwcDTr2A==", + "version": "8.4.0", + "resolved": "https://registry.npmjs.org/@octokit/request/-/request-8.4.0.tgz", + "integrity": "sha512-9Bb014e+m2TgBeEJGEbdplMVWwPmL1FPtggHQRkV+WVsMggPtEkLKPlcVYm/o8xKLkpJ7B+6N8WfQMtDLX2Dpw==", "dev": true, "dependencies": { - "@octokit/endpoint": "^6.0.1", - "@octokit/request-error": "^2.1.0", - "@octokit/types": "^6.16.1", - "is-plain-object": "^5.0.0", - "node-fetch": "^2.6.7", + "@octokit/endpoint": "^9.0.1", + "@octokit/request-error": "^5.1.0", + "@octokit/types": "^13.1.0", "universal-user-agent": "^6.0.0" + }, + "engines": { + "node": ">= 18" } }, "node_modules/@octokit/request-error": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/@octokit/request-error/-/request-error-2.1.0.tgz", - "integrity": "sha512-1VIvgXxs9WHSjicsRwq8PlR2LR2x6DwsJAaFgzdi0JfJoGSO8mYI/cHJQ+9FbN21aa+DrgNLnwObmyeSC8Rmpg==", + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/@octokit/request-error/-/request-error-5.1.0.tgz", + "integrity": "sha512-GETXfE05J0+7H2STzekpKObFe765O5dlAKUTLNGeH+x47z7JjXHfsHKo5z21D/o/IOZTUEI6nyWyR+bZVP/n5Q==", "dev": true, "dependencies": { - "@octokit/types": "^6.0.3", + "@octokit/types": "^13.1.0", "deprecation": "^2.0.0", "once": "^1.4.0" + }, + "engines": { + "node": ">= 18" } }, "node_modules/@octokit/rest": { - "version": "18.12.0", - "resolved": "https://registry.npmjs.org/@octokit/rest/-/rest-18.12.0.tgz", - "integrity": "sha512-gDPiOHlyGavxr72y0guQEhLsemgVjwRePayJ+FcKc2SJqKUbxbkvf5kAZEWA/MKvsfYlQAMVzNJE3ezQcxMJ2Q==", + "version": "20.1.1", + "resolved": "https://registry.npmjs.org/@octokit/rest/-/rest-20.1.1.tgz", + "integrity": "sha512-MB4AYDsM5jhIHro/dq4ix1iWTLGToIGk6cWF5L6vanFaMble5jTX/UBQyiv05HsWnwUtY8JrfHy2LWfKwihqMw==", "dev": true, "dependencies": { - "@octokit/core": "^3.5.1", - "@octokit/plugin-paginate-rest": "^2.16.8", - "@octokit/plugin-request-log": "^1.0.4", - "@octokit/plugin-rest-endpoint-methods": "^5.12.0" + "@octokit/core": "^5.0.2", + "@octokit/plugin-paginate-rest": "11.3.1", + "@octokit/plugin-request-log": "^4.0.0", + "@octokit/plugin-rest-endpoint-methods": "13.2.2" + }, + "engines": { + "node": ">= 18" } }, "node_modules/@octokit/types": { - "version": "6.41.0", - "resolved": "https://registry.npmjs.org/@octokit/types/-/types-6.41.0.tgz", - "integrity": "sha512-eJ2jbzjdijiL3B4PrSQaSjuF2sPEQPVCPzBvTHJD9Nz+9dw2SGH4K4xeQJ77YfTq5bRQ+bD8wT11JbeDPmxmGg==", + "version": "13.5.0", + "resolved": "https://registry.npmjs.org/@octokit/types/-/types-13.5.0.tgz", + "integrity": "sha512-HdqWTf5Z3qwDVlzCrP8UJquMwunpDiMPt5er+QjGzL4hqr/vBVY/MauQgS1xWxCDT1oMx1EULyqxncdCY/NVSQ==", "dev": true, "dependencies": { - "@octokit/openapi-types": "^12.11.0" + "@octokit/openapi-types": "^22.2.0" } }, "node_modules/@ungap/promise-all-settled": { @@ -164,6 +206,17 @@ "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz", "integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==" }, + "node_modules/agent-base": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", + "integrity": "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==", + "dependencies": { + "debug": "4" + }, + "engines": { + "node": ">= 6.0.0" + } + }, "node_modules/ansi-colors": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz", @@ -210,17 +263,21 @@ } }, "node_modules/aproba": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/aproba/-/aproba-1.2.0.tgz", - "integrity": "sha512-Y9J6ZjXtoYh8RnXVCMOU/ttDmk1aBjunq9vO0ta5x85WDQiQfUF9sIPBITdbiiIVcBo03Hi3jMxigBtsddlXRw==" + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/aproba/-/aproba-2.0.0.tgz", + "integrity": "sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==" }, "node_modules/are-we-there-yet": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/are-we-there-yet/-/are-we-there-yet-1.1.7.tgz", - "integrity": "sha512-nxwy40TuMiUGqMyRHgCSWZ9FM4VAoRP4xUYSTv5ImRog+h9yISPbVH7H8fASCIzYn9wlEv4zvFL7uKDMCFQm3g==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/are-we-there-yet/-/are-we-there-yet-2.0.0.tgz", + "integrity": "sha512-Ci/qENmwHnsYo9xKIcUJN5LeDKdJ6R1Z1j9V/J5wyq8nh/mYPEpIKJbBZXtZjG04HiK7zV/p6Vs9952MrMeUIw==", + "deprecated": "This package is no longer supported.", "dependencies": { "delegates": "^1.0.0", - "readable-stream": "^2.0.6" + "readable-stream": "^3.6.0" + }, + "engines": { + "node": ">=10" } }, "node_modules/argparse": { @@ -241,12 +298,15 @@ "dev": true }, "node_modules/binary-extensions": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz", - "integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==", + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", + "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==", "dev": true, "engines": { "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" } }, "node_modules/brace-expansion": { @@ -259,12 +319,12 @@ } }, "node_modules/braces": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", - "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", "dev": true, "dependencies": { - "fill-range": "^7.0.1" + "fill-range": "^7.1.1" }, "engines": { "node": ">=8" @@ -317,30 +377,39 @@ } }, "node_modules/chokidar": { - "version": "3.5.1", - "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.1.tgz", - "integrity": "sha512-9+s+Od+W0VJJzawDma/gvBNQqkTiqYTWLuZoyAsivsI4AaWTCzHG06/TMjsf1cYe9Cb97UCEhjz7HvnPk2p/tw==", + "version": "3.5.3", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.3.tgz", + "integrity": "sha512-Dr3sfKRP6oTcjf2JmUmFJfeVMvXBdegxB0iVQ5eb2V10uFJUCAS8OByZdVAyVb8xXNz3GjjTgj9kLWsZTqE6kw==", "dev": true, + "funding": [ + { + "type": "individual", + "url": "https://paulmillr.com/funding/" + } + ], "dependencies": { - "anymatch": "~3.1.1", + "anymatch": "~3.1.2", "braces": "~3.0.2", - "glob-parent": "~5.1.0", + "glob-parent": "~5.1.2", "is-binary-path": "~2.1.0", "is-glob": "~4.0.1", "normalize-path": "~3.0.0", - "readdirp": "~3.5.0" + "readdirp": "~3.6.0" }, "engines": { "node": ">= 8.10.0" }, "optionalDependencies": { - "fsevents": "~2.3.1" + "fsevents": "~2.3.2" } }, "node_modules/chownr": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", - "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==" + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-2.0.0.tgz", + "integrity": "sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==", + "engines": { + "node": ">=10" + } }, "node_modules/cliui": { "version": "7.0.4", @@ -423,13 +492,21 @@ "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", "dev": true }, + "node_modules/color-support": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-support/-/color-support-1.1.3.tgz", + "integrity": "sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg==", + "bin": { + "color-support": "bin.js" + } + }, "node_modules/commander": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/commander/-/commander-7.2.0.tgz", - "integrity": "sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw==", + "version": "12.0.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-12.0.0.tgz", + "integrity": "sha512-MwVNWlYjDTtOjX5PiD7o5pK0UrFU/OYgcJfjjK4RaHZETNtjJqrZa9Y9ds88+A+f+d5lv+561eZ+yCKoS3gbAA==", "dev": true, "engines": { - "node": ">= 10" + "node": ">=18" } }, "node_modules/concat-map": { @@ -442,16 +519,10 @@ "resolved": "https://registry.npmjs.org/console-control-strings/-/console-control-strings-1.1.0.tgz", "integrity": "sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ==" }, - "node_modules/core-util-is": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz", - "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==" - }, "node_modules/debug": { - "version": "4.3.1", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.1.tgz", - "integrity": "sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==", - "dev": true, + "version": "4.3.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.3.tgz", + "integrity": "sha512-/zxw5+vh1Tfv+4Qn7a5nsbcJKPaSvCDhojn6FEl9vupwK2VCSDtEiEtqr8DFtzYFOdz63LBkxec7DYuc2jon6Q==", "dependencies": { "ms": "2.1.2" }, @@ -467,8 +538,7 @@ "node_modules/debug/node_modules/ms": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", - "dev": true + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, "node_modules/decamelize": { "version": "4.0.0", @@ -482,14 +552,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/deep-extend": { - "version": "0.6.0", - "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz", - "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==", - "engines": { - "node": ">=4.0.0" - } - }, "node_modules/delegates": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/delegates/-/delegates-1.0.0.tgz", @@ -502,14 +564,11 @@ "dev": true }, "node_modules/detect-libc": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-1.0.3.tgz", - "integrity": "sha512-pGjwhsmsp4kL2RTz08wcOlGN83otlqHeD/Z5T8GXZB+/YcpQ/dgo+lbU8ZsGxV0HIvqqxo9l7mqYwyYMD9bKDg==", - "bin": { - "detect-libc": "bin/detect-libc.js" - }, + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.3.tgz", + "integrity": "sha512-bwy0MGW55bG41VqxxypOsdSdGqLwXPI/focwgTYCFMbdUiBAxLg9CFzG08sz2aqzknwiX7Hkl0bQENjg8iLByw==", "engines": { - "node": ">=0.10" + "node": ">=8" } }, "node_modules/diff": { @@ -524,8 +583,7 @@ "node_modules/emoji-regex": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" }, "node_modules/escalade": { "version": "3.1.1", @@ -549,9 +607,9 @@ } }, "node_modules/fill-range": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", - "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", "dev": true, "dependencies": { "to-regex-range": "^5.0.1" @@ -586,11 +644,25 @@ } }, "node_modules/fs-minipass": { - "version": "1.2.7", - "resolved": "https://registry.npmjs.org/fs-minipass/-/fs-minipass-1.2.7.tgz", - "integrity": "sha512-GWSSJGFy4e9GUeCcbIkED+bgAoFyj7XF1mV8rma3QW4NIqX9Kyx79N/PF61H5udOV3aY1IaMLs6pGbH71nlCTA==", + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fs-minipass/-/fs-minipass-2.1.0.tgz", + "integrity": "sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==", + "dependencies": { + "minipass": "^3.0.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/fs-minipass/node_modules/minipass": { + "version": "3.3.6", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-3.3.6.tgz", + "integrity": "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==", "dependencies": { - "minipass": "^2.6.0" + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" } }, "node_modules/fs.realpath": { @@ -613,18 +685,63 @@ } }, "node_modules/gauge": { - "version": "2.7.4", - "resolved": "https://registry.npmjs.org/gauge/-/gauge-2.7.4.tgz", - "integrity": "sha512-14x4kjc6lkD3ltw589k0NrPD6cCNTD6CWoVUNpB85+DrtONoZn+Rug6xZU5RvSC4+TZPxA5AnBibQYAvZn41Hg==", + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/gauge/-/gauge-3.0.2.tgz", + "integrity": "sha512-+5J6MS/5XksCuXq++uFRsnUd7Ovu1XenbeuIuNRJxYWjgQbPuFhT14lAvsWfqfAmnwluf1OwMjz39HjfLPci0Q==", + "deprecated": "This package is no longer supported.", "dependencies": { - "aproba": "^1.0.3", + "aproba": "^1.0.3 || ^2.0.0", + "color-support": "^1.1.2", "console-control-strings": "^1.0.0", - "has-unicode": "^2.0.0", - "object-assign": "^4.1.0", + "has-unicode": "^2.0.1", + "object-assign": "^4.1.1", "signal-exit": "^3.0.0", - "string-width": "^1.0.1", - "strip-ansi": "^3.0.1", - "wide-align": "^1.1.0" + "string-width": "^4.2.3", + "strip-ansi": "^6.0.1", + "wide-align": "^1.1.2" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/gauge/node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "engines": { + "node": ">=8" + } + }, + "node_modules/gauge/node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "engines": { + "node": ">=8" + } + }, + "node_modules/gauge/node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/gauge/node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" } }, "node_modules/get-caller-file": { @@ -637,9 +754,10 @@ } }, "node_modules/glob": { - "version": "7.1.6", - "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz", - "integrity": "sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==", + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.0.tgz", + "integrity": "sha512-lmLf6gtyrPq8tTjSmrO94wBeQbFR3HbLHbuyD69wuyQkImp2hWqMGB47OX65FBkPffO641IP9jWa1z4ivqG26Q==", + "deprecated": "Glob versions prior to v9 are no longer supported", "dependencies": { "fs.realpath": "^1.0.0", "inflight": "^1.0.4", @@ -699,23 +817,16 @@ "he": "bin/he" } }, - "node_modules/iconv-lite": { - "version": "0.4.24", - "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", - "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "node_modules/https-proxy-agent": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", + "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", "dependencies": { - "safer-buffer": ">= 2.1.2 < 3" + "agent-base": "6", + "debug": "4" }, "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/ignore-walk": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/ignore-walk/-/ignore-walk-3.0.4.tgz", - "integrity": "sha512-PY6Ii8o1jMRA1z4F2hRkH/xN59ox43DavKvD3oDpfurRlOJyAHpifIwpbdv1n4jt4ov0jSpw3kQ4GhJnpBL6WQ==", - "dependencies": { - "minimatch": "^3.0.4" + "node": ">= 6" } }, "node_modules/inflight": { @@ -732,11 +843,6 @@ "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" }, - "node_modules/ini": { - "version": "1.3.8", - "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz", - "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==" - }, "node_modules/is-binary-path": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", @@ -799,20 +905,18 @@ "node": ">=8" } }, - "node_modules/is-plain-object": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-5.0.0.tgz", - "integrity": "sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==", + "node_modules/is-unicode-supported": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/is-unicode-supported/-/is-unicode-supported-0.1.0.tgz", + "integrity": "sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw==", "dev": true, "engines": { - "node": ">=0.10.0" + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/isarray": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", - "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==" - }, "node_modules/isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", @@ -820,9 +924,9 @@ "dev": true }, "node_modules/js-yaml": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.0.0.tgz", - "integrity": "sha512-pqon0s+4ScYUvX30wxQi3PogGFAlUyH0awepWvwkj4jD4v+ova3RiYw8bmA6x2rDrEaj8i/oWKoRxpVNW+Re8Q==", + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", + "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", "dev": true, "dependencies": { "argparse": "^2.0.1" @@ -847,21 +951,47 @@ } }, "node_modules/log-symbols": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-4.0.0.tgz", - "integrity": "sha512-FN8JBzLx6CzeMrB0tg6pqlGU1wCrXW+ZXGH481kfsBqer0hToTIiHdjH4Mq8xJUbvATujKCvaREGWpGUionraA==", + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-4.1.0.tgz", + "integrity": "sha512-8XPvpAA8uyhfteu8pIvQxpJZ7SYYdpUivZpGy6sFsBuKRY/7rQGavedeB8aK+Zkyq6upMFVL/9AW6vOYzfRyLg==", "dev": true, "dependencies": { - "chalk": "^4.0.0" + "chalk": "^4.1.0", + "is-unicode-supported": "^0.1.0" }, "engines": { "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/make-dir": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/make-dir/-/make-dir-3.1.0.tgz", + "integrity": "sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==", + "dependencies": { + "semver": "^6.0.0" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/make-dir/node_modules/semver": { + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", + "bin": { + "semver": "bin/semver.js" } }, "node_modules/minimatch": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", - "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", "dependencies": { "brace-expansion": "^1.1.7" }, @@ -869,70 +999,75 @@ "node": "*" } }, - "node_modules/minimist": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", - "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, "node_modules/minipass": { - "version": "2.9.0", - "resolved": "https://registry.npmjs.org/minipass/-/minipass-2.9.0.tgz", - "integrity": "sha512-wxfUjg9WebH+CUDX/CdbRlh5SmfZiy/hpkxaRI16Y9W56Pa75sWgd/rvFilSgrauD9NyFymP/+JFV3KwzIsJeg==", - "dependencies": { - "safe-buffer": "^5.1.2", - "yallist": "^3.0.0" + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-5.0.0.tgz", + "integrity": "sha512-3FnjYuehv9k6ovOEbyOswadCDPX1piCfhV8ncmYtHOjuPwylVWsghTLo7rabjC3Rx5xD4HDx8Wm1xnMF7S5qFQ==", + "engines": { + "node": ">=8" } }, "node_modules/minizlib": { - "version": "1.3.3", - "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-1.3.3.tgz", - "integrity": "sha512-6ZYMOEnmVsdCeTJVE0W9ZD+pVnE8h9Hma/iOwwRDsdQoePpoX56/8B6z3P9VNwppJuBKNRuFDRNRqRWexT9G9Q==", + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-2.1.2.tgz", + "integrity": "sha512-bAxsR8BVfj60DWXHE3u30oHzfl4G7khkSuPW+qvpd7jFRHm7dLxOjUk1EHACJ/hxLY8phGJ0YhYHZo7jil7Qdg==", "dependencies": { - "minipass": "^2.9.0" + "minipass": "^3.0.0", + "yallist": "^4.0.0" + }, + "engines": { + "node": ">= 8" } }, - "node_modules/mkdirp": { - "version": "0.5.6", - "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz", - "integrity": "sha512-FP+p8RB8OWpF3YZBCrP5gtADmtXApB5AMLn+vdyA+PyxCjrCs00mjyUozssO33cwDeT3wNGdLxJ5M//YqtHAJw==", + "node_modules/minizlib/node_modules/minipass": { + "version": "3.3.6", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-3.3.6.tgz", + "integrity": "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==", "dependencies": { - "minimist": "^1.2.6" + "yallist": "^4.0.0" }, + "engines": { + "node": ">=8" + } + }, + "node_modules/mkdirp": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-1.0.4.tgz", + "integrity": "sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw==", "bin": { "mkdirp": "bin/cmd.js" + }, + "engines": { + "node": ">=10" } }, "node_modules/mocha": { - "version": "8.4.0", - "resolved": "https://registry.npmjs.org/mocha/-/mocha-8.4.0.tgz", - "integrity": "sha512-hJaO0mwDXmZS4ghXsvPVriOhsxQ7ofcpQdm8dE+jISUOKopitvnXFQmpRR7jd2K6VBG6E26gU3IAbXXGIbu4sQ==", + "version": "9.2.2", + "resolved": "https://registry.npmjs.org/mocha/-/mocha-9.2.2.tgz", + "integrity": "sha512-L6XC3EdwT6YrIk0yXpavvLkn8h+EU+Y5UcCHKECyMbdUIxyMuZj4bX4U9e1nvnvUUvQVsV2VHQr5zLdcUkhW/g==", "dev": true, "dependencies": { "@ungap/promise-all-settled": "1.1.2", "ansi-colors": "4.1.1", "browser-stdout": "1.3.1", - "chokidar": "3.5.1", - "debug": "4.3.1", + "chokidar": "3.5.3", + "debug": "4.3.3", "diff": "5.0.0", "escape-string-regexp": "4.0.0", "find-up": "5.0.0", - "glob": "7.1.6", + "glob": "7.2.0", "growl": "1.10.5", "he": "1.2.0", - "js-yaml": "4.0.0", - "log-symbols": "4.0.0", - "minimatch": "3.0.4", + "js-yaml": "4.1.0", + "log-symbols": "4.1.0", + "minimatch": "4.2.1", "ms": "2.1.3", - "nanoid": "3.1.20", - "serialize-javascript": "5.0.1", + "nanoid": "3.3.1", + "serialize-javascript": "6.0.0", "strip-json-comments": "3.1.1", "supports-color": "8.1.1", "which": "2.0.2", - "wide-align": "1.1.3", - "workerpool": "6.1.0", + "workerpool": "6.2.0", "yargs": "16.2.0", "yargs-parser": "20.2.4", "yargs-unparser": "2.0.0" @@ -942,27 +1077,40 @@ "mocha": "bin/mocha" }, "engines": { - "node": ">= 10.12.0" + "node": ">= 12.0.0" }, "funding": { "type": "opencollective", "url": "https://opencollective.com/mochajs" } }, + "node_modules/mocha/node_modules/minimatch": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-4.2.1.tgz", + "integrity": "sha512-9Uq1ChtSZO+Mxa/CL1eGizn2vRn3MlLgzhT0Iz8zaY8NdvxvB0d5QdPFmCKf7JKA9Lerx5vRrnwO03jsSfGG9g==", + "dev": true, + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", - "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true }, "node_modules/nan": { - "version": "2.18.0", - "resolved": "https://registry.npmjs.org/nan/-/nan-2.18.0.tgz", - "integrity": "sha512-W7tfG7vMOGtD30sHoZSSc/JVYiyDPEyQVso/Zz+/uQd0B0L46gtC+pHha5FFMRpil6fm/AoEcRWyOVi4+E/f8w==" + "version": "2.20.0", + "resolved": "https://registry.npmjs.org/nan/-/nan-2.20.0.tgz", + "integrity": "sha512-bk3gXBZDGILuuo/6sKtr0DQmSThYHLtNCdSdXk9YkxD/jK6X2vmCyyXBBxyqZ4XcnzTyYEAThfX3DCEnLf6igw==" }, "node_modules/nanoid": { - "version": "3.1.20", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.1.20.tgz", - "integrity": "sha512-a1cQNyczgKbLX9jwbS/+d7W8fX/RfgYR7lVWwWOGIPNgK2m0MWvrGF6/m4kk6U3QcFMnZf3RIhL0v2Jgh/0Uxw==", + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.1.tgz", + "integrity": "sha512-n6Vs/3KGyxPQd6uO0eH4Bv0ojGSUvuLlIHtC3Y0kEO23YRge8H9x1GCzLn28YX0H66pMkxuaeESFq4tKISKwdw==", "dev": true, "bin": { "nanoid": "bin/nanoid.cjs" @@ -971,35 +1119,10 @@ "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" } }, - "node_modules/needle": { - "version": "2.9.1", - "resolved": "https://registry.npmjs.org/needle/-/needle-2.9.1.tgz", - "integrity": "sha512-6R9fqJ5Zcmf+uYaFgdIHmLwNldn5HbK8L5ybn7Uz+ylX/rnOsSp1AHcvQSrCaFN+qNM1wpymHqD7mVasEOlHGQ==", - "dependencies": { - "debug": "^3.2.6", - "iconv-lite": "^0.4.4", - "sax": "^1.2.4" - }, - "bin": { - "needle": "bin/needle" - }, - "engines": { - "node": ">= 4.4.x" - } - }, - "node_modules/needle/node_modules/debug": { - "version": "3.2.7", - "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", - "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", - "dependencies": { - "ms": "^2.1.1" - } - }, "node_modules/node-fetch": { "version": "2.7.0", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", - "dev": true, "dependencies": { "whatwg-url": "^5.0.0" }, @@ -1015,50 +1138,31 @@ } } }, - "node_modules/node-pre-gyp": { - "version": "0.14.0", - "resolved": "https://registry.npmjs.org/node-pre-gyp/-/node-pre-gyp-0.14.0.tgz", - "integrity": "sha512-+CvDC7ZttU/sSt9rFjix/P05iS43qHCOOGzcr3Ry99bXG7VX953+vFyEuph/tfqoYu8dttBkE86JSKBO2OzcxA==", - "deprecated": "Please upgrade to @mapbox/node-pre-gyp: the non-scoped node-pre-gyp package is deprecated and only the @mapbox scoped package will recieve updates in the future", - "dependencies": { - "detect-libc": "^1.0.2", - "mkdirp": "^0.5.1", - "needle": "^2.2.1", - "nopt": "^4.0.1", - "npm-packlist": "^1.1.6", - "npmlog": "^4.0.2", - "rc": "^1.2.7", - "rimraf": "^2.6.1", - "semver": "^5.3.0", - "tar": "^4.4.2" - }, - "bin": { - "node-pre-gyp": "bin/node-pre-gyp" - } - }, "node_modules/node-pre-gyp-github": { - "version": "1.4.4", - "resolved": "https://registry.npmjs.org/node-pre-gyp-github/-/node-pre-gyp-github-1.4.4.tgz", - "integrity": "sha512-oE9JD1aXRi4+1jSH7Q+ybEhfujW5bJ66n4YMGpaUp/k2/X/8i09ouK1seznf3wOagcKjytRJCkf71DdEJx2zhA==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/node-pre-gyp-github/-/node-pre-gyp-github-2.0.0.tgz", + "integrity": "sha512-cTc6dRK3Sa4z+Z/1QAtAQErnaePir83ZveUz80VLK11zeYokt4cNJ6Aexoi58cSJ7xGvgZJImUeD1vIl2e7pjQ==", "dev": true, "dependencies": { - "@octokit/rest": "18.12.0", - "commander": "7.2.0" + "@octokit/rest": "20.1.1", + "commander": "12.0.0" }, "bin": { "node-pre-gyp-github": "bin/node-pre-gyp-github.js" } }, "node_modules/nopt": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/nopt/-/nopt-4.0.3.tgz", - "integrity": "sha512-CvaGwVMztSMJLOeXPrez7fyfObdZqNUK1cPAEzLHrTybIua9pMdmmPR5YwtfNftIOMv3DPUhFaxsZMNTQO20Kg==", + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/nopt/-/nopt-5.0.0.tgz", + "integrity": "sha512-Tbj67rffqceeLpcRXrT7vKAN8CwfPeIBgM7E6iBkmKLV7bEMwpGgYLGv0jACUsECaa/vuxP0IjEont6umdMgtQ==", "dependencies": { - "abbrev": "1", - "osenv": "^0.1.4" + "abbrev": "1" }, "bin": { "nopt": "bin/nopt.js" + }, + "engines": { + "node": ">=6" } }, "node_modules/normalize-path": { @@ -1070,38 +1174,16 @@ "node": ">=0.10.0" } }, - "node_modules/npm-bundled": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/npm-bundled/-/npm-bundled-1.1.2.tgz", - "integrity": "sha512-x5DHup0SuyQcmL3s7Rx/YQ8sbw/Hzg0rj48eN0dV7hf5cmQq5PXIeioroH3raV1QC1yh3uTYuMThvEQF3iKgGQ==", - "dependencies": { - "npm-normalize-package-bin": "^1.0.1" - } - }, - "node_modules/npm-normalize-package-bin": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/npm-normalize-package-bin/-/npm-normalize-package-bin-1.0.1.tgz", - "integrity": "sha512-EPfafl6JL5/rU+ot6P3gRSCpPDW5VmIzX959Ob1+ySFUuuYHWHekXpwdUZcKP5C+DS4GEtdJluwBjnsNDl+fSA==" - }, - "node_modules/npm-packlist": { - "version": "1.4.8", - "resolved": "https://registry.npmjs.org/npm-packlist/-/npm-packlist-1.4.8.tgz", - "integrity": "sha512-5+AZgwru5IevF5ZdnFglB5wNlHG1AOOuw28WhUq8/8emhBmLv6jX5by4WJCh7lW0uSYZYS6DXqIsyZVIXRZU9A==", - "dependencies": { - "ignore-walk": "^3.0.1", - "npm-bundled": "^1.0.1", - "npm-normalize-package-bin": "^1.0.1" - } - }, "node_modules/npmlog": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/npmlog/-/npmlog-4.1.2.tgz", - "integrity": "sha512-2uUqazuKlTaSI/dC8AzicUck7+IrEaOnN/e0jd3Xtt1KcGpwx30v50mL7oPyr/h9bL3E4aZccVwpwP+5W9Vjkg==", + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/npmlog/-/npmlog-5.0.1.tgz", + "integrity": "sha512-AqZtDUWOMKs1G/8lwylVjrdYgqA4d9nu8hc+0gzRxlDb1I10+FHBGMXs6aiQHFdCUUlqH99MUMuLfzWDNDtfxw==", + "deprecated": "This package is no longer supported.", "dependencies": { - "are-we-there-yet": "~1.1.2", - "console-control-strings": "~1.1.0", - "gauge": "~2.7.3", - "set-blocking": "~2.0.0" + "are-we-there-yet": "^2.0.0", + "console-control-strings": "^1.1.0", + "gauge": "^3.0.0", + "set-blocking": "^2.0.0" } }, "node_modules/number-is-nan": { @@ -1128,31 +1210,6 @@ "wrappy": "1" } }, - "node_modules/os-homedir": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/os-homedir/-/os-homedir-1.0.2.tgz", - "integrity": "sha512-B5JU3cabzk8c67mRRd3ECmROafjYMXbuzlwtqdM8IbS8ktlTix8aFGb2bAGKrSRIlnfKwovGUUr72JUPyOb6kQ==", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/os-tmpdir": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz", - "integrity": "sha512-D2FR03Vir7FIu45XBY20mTb+/ZSWB00sjU9jdQXt83gDrI4Ztz5Fs7/yy74g2N5SVQY4xY1qDr4rNddwYRVX0g==", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/osenv": { - "version": "0.1.5", - "resolved": "https://registry.npmjs.org/osenv/-/osenv-0.1.5.tgz", - "integrity": "sha512-0CWcCECdMVc2Rw3U5w9ZjqX6ga6ubk1xDVKxtBQPK7wis/0F2r9T6k4ydGYhecl7YUBxBVxhL5oisPsNxAPe2g==", - "dependencies": { - "os-homedir": "^1.0.0", - "os-tmpdir": "^1.0.0" - } - }, "node_modules/p-limit": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", @@ -1212,11 +1269,6 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, - "node_modules/process-nextick-args": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", - "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==" - }, "node_modules/randombytes": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz", @@ -1226,46 +1278,23 @@ "safe-buffer": "^5.1.0" } }, - "node_modules/rc": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", - "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==", + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", "dependencies": { - "deep-extend": "^0.6.0", - "ini": "~1.3.0", - "minimist": "^1.2.0", - "strip-json-comments": "~2.0.1" + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" }, - "bin": { - "rc": "cli.js" - } - }, - "node_modules/rc/node_modules/strip-json-comments": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", - "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==", "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/readable-stream": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz", - "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", - "dependencies": { - "core-util-is": "~1.0.0", - "inherits": "~2.0.3", - "isarray": "~1.0.0", - "process-nextick-args": "~2.0.0", - "safe-buffer": "~5.1.1", - "string_decoder": "~1.1.1", - "util-deprecate": "~1.0.1" + "node": ">= 6" } }, "node_modules/readdirp": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.5.0.tgz", - "integrity": "sha512-cMhu7c/8rdhkHXWsY+osBhfSy0JikwpHK/5+imo+LpeasTF8ouErHrlYkwT0++njiyuDvc7OFY5T3ukvZ8qmFQ==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", + "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", "dev": true, "dependencies": { "picomatch": "^2.2.1" @@ -1284,43 +1313,54 @@ } }, "node_modules/rimraf": { - "version": "2.7.1", - "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.7.1.tgz", - "integrity": "sha512-uWjbaKIK3T1OSVptzX7Nl6PvQ3qAGtKEtVRjRuazjfL3Bx5eI409VZSqgND+4UNnmzLVdPj9FqFJNPqBZFve4w==", + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz", + "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==", + "deprecated": "Rimraf versions prior to v4 are no longer supported", "dependencies": { "glob": "^7.1.3" }, "bin": { "rimraf": "bin.js" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" } }, "node_modules/safe-buffer": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", - "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" - }, - "node_modules/safer-buffer": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", - "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" - }, - "node_modules/sax": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/sax/-/sax-1.3.0.tgz", - "integrity": "sha512-0s+oAmw9zLl1V1cS9BtZN7JAd0cW5e0QH4W3LWEK6a4LaLEA2OTpGYWDY+6XasBLtz6wkm3u1xRw95mRuJ59WA==" + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] }, "node_modules/semver": { - "version": "5.7.2", - "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.2.tgz", - "integrity": "sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g==", + "version": "7.6.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz", + "integrity": "sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==", "bin": { - "semver": "bin/semver" + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" } }, "node_modules/serialize-javascript": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-5.0.1.tgz", - "integrity": "sha512-SaaNal9imEO737H2c05Og0/8LUXG7EnsZyMa8MzkmuHoELfT6txuj0cMqRj6zfPKnmQ1yasR4PCJc8x+M4JSPA==", + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.0.tgz", + "integrity": "sha512-Qr3TosvguFt8ePWqsvRfrKyQXIiW+nGbYpy8XK24NQHE83caxWt+mIymTT19DGFbNWNLfEwsrkSmN64lVWB9ag==", "dev": true, "dependencies": { "randombytes": "^2.1.0" @@ -1337,11 +1377,11 @@ "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==" }, "node_modules/string_decoder": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", - "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", "dependencies": { - "safe-buffer": "~5.1.0" + "safe-buffer": "~5.2.0" } }, "node_modules/string-width": { @@ -1396,41 +1436,21 @@ } }, "node_modules/tar": { - "version": "4.4.19", - "resolved": "https://registry.npmjs.org/tar/-/tar-4.4.19.tgz", - "integrity": "sha512-a20gEsvHnWe0ygBY8JbxoM4w3SJdhc7ZAuxkLqh+nvNQN2IOt0B5lLgM490X5Hl8FF0dl0tOf2ewFYAlIFgzVA==", + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/tar/-/tar-6.2.1.tgz", + "integrity": "sha512-DZ4yORTwrbTj/7MZYq2w+/ZFdI6OZ/f9SFHR+71gIVUZhOQPHzVCLpvRnPgyaMpfWxxk/4ONva3GQSyNIKRv6A==", "dependencies": { - "chownr": "^1.1.4", - "fs-minipass": "^1.2.7", - "minipass": "^2.9.0", - "minizlib": "^1.3.3", - "mkdirp": "^0.5.5", - "safe-buffer": "^5.2.1", - "yallist": "^3.1.1" + "chownr": "^2.0.0", + "fs-minipass": "^2.0.0", + "minipass": "^5.0.0", + "minizlib": "^2.1.1", + "mkdirp": "^1.0.3", + "yallist": "^4.0.0" }, "engines": { - "node": ">=4.5" + "node": ">=10" } }, - "node_modules/tar/node_modules/safe-buffer": { - "version": "5.2.1", - "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", - "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ] - }, "node_modules/to-regex-range": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", @@ -1446,13 +1466,12 @@ "node_modules/tr46": { "version": "0.0.3", "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", - "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", - "dev": true + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" }, "node_modules/universal-user-agent": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/universal-user-agent/-/universal-user-agent-6.0.0.tgz", - "integrity": "sha512-isyNax3wXoKaulPDZWHQqbmIx1k2tb9fb3GGDBRxCscfYV2Ch7WxPArBsFEG8s/safwXTT7H4QGhaIkTp9447w==", + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/universal-user-agent/-/universal-user-agent-6.0.1.tgz", + "integrity": "sha512-yCzhz6FN2wU1NiiQRogkTQszlQSlpWaw8SvVegAc+bDxbzHgh1vX8uIe8OYyMH6DwH+sdTJsgMl36+mSMdRJIQ==", "dev": true }, "node_modules/util-deprecate": { @@ -1463,14 +1482,12 @@ "node_modules/webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", - "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", - "dev": true + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" }, "node_modules/whatwg-url": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", - "dev": true, "dependencies": { "tr46": "~0.0.3", "webidl-conversions": "^3.0.0" @@ -1500,9 +1517,9 @@ } }, "node_modules/workerpool": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.1.0.tgz", - "integrity": "sha512-toV7q9rWNYha963Pl/qyeZ6wG+3nnsyvolaNUS8+R5Wtw6qJPTxIlOP1ZSvcGhEJw+l3HMMmtiNo9Gl61G4GVg==", + "version": "6.2.0", + "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.2.0.tgz", + "integrity": "sha512-Rsk5qQHJ9eowMH28Jwhe8HEbmdYDX4lwoMWshiCXugjtHqMD9ZbiqSDLxcsfdqsETPzVUtX5s1Z5kStiIM6l4A==", "dev": true }, "node_modules/wrap-ansi": { @@ -1581,9 +1598,9 @@ } }, "node_modules/yallist": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", - "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==" + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==" }, "node_modules/yargs": { "version": "16.2.0", diff --git a/package.json b/package.json index ab73d5b..9f0f80d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "opencc", - "version": "1.1.7", + "version": "1.1.8", "description": "Conversion between Traditional and Simplified Chinese", "author": "Carbo Kuo ", "license": "Apache-2.0", @@ -37,11 +37,11 @@ "Traditional Chinese" ], "devDependencies": { - "mocha": "^8.3.0", - "node-pre-gyp-github": "^1.4.3" + "mocha": "^9.2.1", + "node-pre-gyp-github": "^2.0.0" }, "dependencies": { - "nan": "^2.14.2", - "node-pre-gyp": "^0.14.0" + "@mapbox/node-pre-gyp": "^1.0.11", + "nan": "^2.20.0" } -} +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..9ec751f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools>=61", "wheel", "cmake"] +build-backend = "setuptools.build_meta" diff --git a/python/opencc/.gitignore b/python/opencc/.gitignore index 8840e76..a1374b4 100644 --- a/python/opencc/.gitignore +++ b/python/opencc/.gitignore @@ -1,2 +1,3 @@ version.py clib/ +!clib/__init__.py diff --git a/python/opencc/clib/__init__.py b/python/opencc/clib/__init__.py index 8b13789..e69de29 100644 --- a/python/opencc/clib/__init__.py +++ b/python/opencc/clib/__init__.py @@ -1 +0,0 @@ - diff --git a/release-pypi-linux.sh b/release-pypi-linux.sh index b0a11bc..d5391db 100644 --- a/release-pypi-linux.sh +++ b/release-pypi-linux.sh @@ -33,13 +33,13 @@ for VERSION in 3.8 3.9 3.10 3.11 3.12; do conda activate py$VERSION # Build and package - pip install --no-cache-dir setuptools wheel cmake - python setup.py build_ext bdist_wheel \ - --plat-name manylinux1_x86_64 + pip install --no-cache-dir build + python -m build \ + -C--plat-name="manylinux2014_$(uname --machine)" # Cleanup conda deactivate - rm -rf build python/opencc/clib OpenCC.egg-info + rm -rf build OpenCC.egg-info done if [ "$1" != "testonly" ]; then diff --git a/release-pypi-macos.sh b/release-pypi-macos.sh index 7b3c632..909bea7 100644 --- a/release-pypi-macos.sh +++ b/release-pypi-macos.sh @@ -20,12 +20,12 @@ for VERSION in 3.8 3.9 3.10 3.11 3.12; do conda activate py$VERSION # Build and package - pip install --no-cache-dir setuptools wheel - python setup.py build_ext bdist_wheel + pip install --no-cache-dir build + python -m build --wheel # Cleanup conda deactivate - rm -rf build python/opencc/clib OpenCC.egg-info + rm -rf build OpenCC.egg-info done if [ "$1" != "testonly" ]; then diff --git a/release-pypi-windows.cmd b/release-pypi-windows.cmd index 21b9e96..7056d9d 100644 --- a/release-pypi-windows.cmd +++ b/release-pypi-windows.cmd @@ -17,16 +17,16 @@ for %%v in (%VERSIONS%) do ( if !ERRORLEVEL! NEQ 0 (EXIT !ERRORLEVEL!) CALL C:\Miniconda/condabin/conda.bat activate py%%v if !ERRORLEVEL! NEQ 0 (EXIT !ERRORLEVEL!) - pip install --no-cache-dir setuptools wheel pytest + pip install --no-cache-dir build if !ERRORLEVEL! NEQ 0 (EXIT !ERRORLEVEL!) REM Build and package - python setup.py build_ext bdist_wheel + python -m build --wheel if !ERRORLEVEL! NEQ 0 (EXIT !ERRORLEVEL!) REM Cleanup CALL C:\Miniconda/condabin/conda.bat deactivate - rmdir /S /Q build python\opencc\clib OpenCC.egg-info + rmdir /S /Q build OpenCC.egg-info ) if NOT "%~1"=="testonly" ( diff --git a/setup.py b/setup.py index c6f7bad..a4bc500 100644 --- a/setup.py +++ b/setup.py @@ -9,21 +9,12 @@ import setuptools.command.build_ext import wheel.bdist_wheel _this_dir = os.path.dirname(os.path.abspath(__file__)) -_clib_dir = os.path.join(_this_dir, 'python', 'opencc', 'clib') _build_dir = os.path.join(_this_dir, 'build', 'python') _cmake_file = os.path.join(_this_dir, 'CMakeLists.txt') _author_file = os.path.join(_this_dir, 'AUTHORS') _readme_file = os.path.join(_this_dir, 'README.md') -try: - sys.path.insert(0, os.path.join(_this_dir, 'python')) - - import opencc # noqa - _libopencc_built = True -except ImportError: - _libopencc_built = False - def get_version_info(): version_info = ['1', '0', '0'] @@ -70,20 +61,13 @@ def get_long_description(): return f.read().decode('utf-8') -def build_libopencc(): - if _libopencc_built: - return # Skip building binary file +def build_libopencc(output_path): print('building libopencc into %s' % _build_dir) is_windows = sys.platform == 'win32' # Make build directories - if is_windows: - subprocess.call('md {}'.format(_build_dir), shell=True) - subprocess.call('md {}'.format(_clib_dir), shell=True) - else: - subprocess.call('mkdir -p {}'.format(_build_dir), shell=True) - subprocess.call('mkdir -p {}'.format(_clib_dir), shell=True) + os.makedirs(_build_dir, exist_ok=True) # Configure cmake_args = [ @@ -93,14 +77,14 @@ def build_libopencc(): '-DENABLE_BENCHMARK:BOOL=OFF', '-DBUILD_PYTHON:BOOL=ON', '-DCMAKE_BUILD_TYPE=Release', - '-DCMAKE_INSTALL_PREFIX={}'.format(_clib_dir), - '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}'.format(_clib_dir), + '-DCMAKE_INSTALL_PREFIX={}'.format(output_path), + '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}'.format(output_path), '-DPYTHON_EXECUTABLE={}'.format(sys.executable), ] if is_windows: cmake_args += \ - ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE={}'.format(_clib_dir)] + ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE={}'.format(output_path)] if sys.maxsize > 2**32: cmake_args += ['-A', 'x64'] @@ -117,11 +101,6 @@ def build_libopencc(): errno = subprocess.call(cmd) assert errno == 0, 'Build failed' - # Empty __init__.py file has to be created - # to make opencc.clib a module - with open('{}/__init__.py'.format(_clib_dir), 'w'): - pass - class OpenCCExtension(setuptools.Extension, object): def __init__(self, name, sourcedir=''): @@ -131,8 +110,12 @@ class OpenCCExtension(setuptools.Extension, object): class BuildExtCommand(setuptools.command.build_ext.build_ext, object): def build_extension(self, ext): + if self.inplace: + output_path = os.path.join(_this_dir, 'python', 'opencc', 'clib') + else: + output_path = os.path.abspath(os.path.join(self.build_lib, 'opencc', 'clib')) if isinstance(ext, OpenCCExtension): - build_libopencc() + build_libopencc(output_path) else: super(BuildExtCommand, self).build_extension(ext) @@ -157,10 +140,10 @@ class BDistWheelCommand(wheel.bdist_wheel.bdist_wheel, object): return 'macosx-11.0-{}'.format(machine) else: raise NotImplementedError - + if os.name == 'posix': _, _, _, _, machine = os.uname() - return 'manylinux1-{}'.format(machine) + return 'manylinux2014-{}'.format(machine) warnings.warn( 'Windows macos and linux are all not detected, ' @@ -190,10 +173,6 @@ setuptools.setup( packages=packages, package_dir={'opencc': 'python/opencc'}, - package_data={str('opencc'): [ - 'clib/opencc_clib*', - 'clib/share/opencc/*', - ]}, ext_modules=[OpenCCExtension('opencc.clib.opencc_clib', 'python')], cmdclass={ 'build_ext': BuildExtCommand, diff --git a/src/BUILD.bazel b/src/BUILD.bazel new file mode 100644 index 0000000..550d3b8 --- /dev/null +++ b/src/BUILD.bazel @@ -0,0 +1,477 @@ +load("@rules_cc//cc:defs.bzl", "cc_library") + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "opencc", + deps = [ + ":binary_dict", + ":common", + ":config", + ":conversion", + ":conversion_chain", + ":converter", + ":darts_dict", + ":dict", + ":dict_converter", + ":dict_entry", + ":dict_group", + ":exception", + ":lexicon", + ":marisa_dict", + ":max_match_segmentation", + ":phrase_extract", + ":segmentation", + ":segments", + ":serializable_dict", + ":serialized_values", + ":simple_converter", + ":utf8_string_slice", + ":utf8_util", + ], +) + +cc_library( + name = "binary_dict", + srcs = ["BinaryDict.cpp"], + hdrs = ["BinaryDict.hpp"], + deps = [ + ":common", + ":lexicon", + ":serializable_dict", + ], +) + +cc_test( + name = "binary_dict_test", + srcs = ["BinaryDictTest.cpp"], + deps = [ + ":binary_dict", + ":text_dict_test_base", + "@googletest//:gtest_main", + ], +) + +cc_library( + name = "cmd_line_output", + hdrs = ["CmdLineOutput.hpp"], + visibility = ["//src/tools:__pkg__"], + deps = [ + "@tclap", + ], +) + +cc_library( + name = "common", + hdrs = [ + "Common.hpp", + "Export.hpp", + "Optional.hpp", + "opencc_config.h", + ], + defines = ["Opencc_BUILT_AS_STATIC"], +) + +cc_library( + name = "config", + srcs = ["Config.cpp"], + hdrs = ["Config.hpp"], + deps = [ + ":common", + ":conversion_chain", + ":converter", + ":darts_dict", + ":dict_group", + ":marisa_dict", + ":max_match_segmentation", + ":text_dict", + "@rapidjson", + ], +) + +cc_test( + name = "config_test", + srcs = ["ConfigTest.cpp"], + deps = [ + ":common", + ":config", + ":config_test_base", + ":converter", + ":test_utils_utf8", + "@googletest//:gtest_main", + ], +) + +cc_library( + name = "config_test_base", + testonly = True, + hdrs = ["ConfigTestBase.hpp"], + data = ["//test/config_test"], + defines = ["BAZEL"], + deps = [ + ":test_utils", + "@bazel_tools//tools/cpp/runfiles", + ], +) + +cc_library( + name = "conversion", + srcs = ["Conversion.cpp"], + hdrs = ["Conversion.hpp"], + deps = [ + ":common", + ":dict", + ":segmentation", + ], +) + +cc_test( + name = "conversion_test", + srcs = ["ConversionTest.cpp"], + deps = [ + ":conversion", + ":dict_group_test_base", + "@googletest//:gtest_main", + ], +) + +cc_library( + name = "conversion_chain", + srcs = ["ConversionChain.cpp"], + hdrs = ["ConversionChain.hpp"], + deps = [ + ":common", + ":conversion", + ":segments", + ], +) + +cc_test( + name = "conversion_chain_test", + srcs = ["ConversionChainTest.cpp"], + deps = [ + ":conversion_chain", + ":dict_group_test_base", + "@googletest//:gtest_main", + ], +) + +cc_library( + name = "converter", + srcs = ["Converter.cpp"], + hdrs = ["Converter.hpp"], + deps = [ + ":common", + ":conversion_chain", + ":segmentation", + ":segments", + ], +) + +cc_library( + name = "darts_dict", + srcs = ["DartsDict.cpp"], + hdrs = ["DartsDict.hpp"], + deps = [ + ":binary_dict", + ":common", + ":lexicon", + ":serializable_dict", + "@darts-clone", + ], +) + +cc_test( + name = "darts_dict_test", + srcs = ["DartsDictTest.cpp"], + deps = [ + ":darts_dict", + ":text_dict_test_base", + "@googletest//:gtest_main", + ], +) + +cc_library( + name = "dict", + srcs = ["Dict.cpp"], + hdrs = ["Dict.hpp"], + deps = [ + ":common", + ":dict_entry", + ], +) + +cc_library( + name = "dict_converter", + srcs = ["DictConverter.cpp"], + hdrs = ["DictConverter.hpp"], + deps = [ + ":common", + ":darts_dict", + ":marisa_dict", + ":text_dict", + ], +) + +cc_library( + name = "dict_entry", + srcs = ["DictEntry.cpp"], + hdrs = ["DictEntry.hpp"], + deps = [ + ":common", + ":segments", + ":utf8_util", + ], +) + +cc_library( + name = "dict_group", + srcs = ["DictGroup.cpp"], + hdrs = ["DictGroup.hpp"], + deps = [ + ":common", + ":dict", + ":lexicon", + ":text_dict", + ], +) + +cc_test( + name = "dict_group_test", + srcs = ["DictGroupTest.cpp"], + deps = [ + ":dict_group_test_base", + "@googletest//:gtest_main", + ], +) + +cc_library( + name = "dict_group_test_base", + testonly = True, + hdrs = ["DictGroupTestBase.hpp"], + deps = [ + ":dict_group", + ":text_dict_test_base", + ], +) + +cc_library( + name = "exception", + hdrs = [ + "Exception.hpp", + ], + deps = [":common"], +) + +cc_library( + name = "lexicon", + srcs = ["Lexicon.cpp"], + hdrs = ["Lexicon.hpp"], + deps = [ + ":common", + ":dict_entry", + ], +) + +cc_library( + name = "marisa_dict", + srcs = ["MarisaDict.cpp"], + hdrs = ["MarisaDict.hpp"], + deps = [ + ":common", + ":lexicon", + ":serialized_values", + "@marisa-trie", + ], +) + +cc_test( + name = "marisa_dict_test", + srcs = ["MarisaDictTest.cpp"], + deps = [ + ":marisa_dict", + ":text_dict_test_base", + "@googletest//:gtest_main", + ], +) + +cc_library( + name = "max_match_segmentation", + srcs = ["MaxMatchSegmentation.cpp"], + hdrs = ["MaxMatchSegmentation.hpp"], + deps = [ + ":common", + ":dict_group", + ":segmentation", + ], +) + +cc_test( + name = "max_match_segmentation_test", + srcs = ["MaxMatchSegmentationTest.cpp"], + deps = [ + ":dict_group_test_base", + ":max_match_segmentation", + "@googletest//:gtest_main", + ], +) + +cc_library( + name = "phrase_extract", + srcs = ["PhraseExtract.cpp"], + hdrs = ["PhraseExtract.hpp"], + visibility = ["//src/tools:__pkg__"], + deps = [ + ":common", + ":marisa_dict", + ":utf8_string_slice", + ], +) + +cc_library( + name = "segmentation", + srcs = ["Segmentation.cpp"], + hdrs = ["Segmentation.hpp"], + deps = [":common"], +) + +cc_library( + name = "segments", + hdrs = ["Segments.hpp"], + deps = [":common"], +) + +cc_library( + name = "serializable_dict", + hdrs = ["SerializableDict.hpp"], + deps = [ + ":dict", + ], +) + +cc_library( + name = "serialized_values", + srcs = ["SerializedValues.cpp"], + hdrs = ["SerializedValues.hpp"], + deps = [ + ":common", + ":lexicon", + ":serializable_dict", + ], +) + +cc_test( + name = "serialized_values_test", + srcs = ["SerializedValuesTest.cpp"], + deps = [ + ":serialized_values", + ":text_dict_test_base", + "@googletest//:gtest_main", + ], +) + +cc_library( + name = "simple_converter", + srcs = ["SimpleConverter.cpp"], + hdrs = [ + "SimpleConverter.hpp", + "opencc.h", + ], + defines = ["BAZEL"], + deps = [ + ":common", + ":config", + ":converter", + ":utf8_util", + "@bazel_tools//tools/cpp/runfiles", + ], +) + +cc_test( + name = "simple_converter_test", + srcs = ["SimpleConverterTest.cpp"], + deps = [ + ":config_test_base", + ":simple_converter", + ":test_utils_utf8", + "@googletest//:gtest_main", + ], +) + +cc_library( + name = "test_utils", + testonly = True, + hdrs = ["TestUtils.hpp"], + deps = [ + "@googletest//:gtest", + ], +) + +cc_library( + name = "test_utils_utf8", + testonly = True, + srcs = ["TestUtilsUTF8.hpp"], +) + +cc_library( + name = "text_dict", + srcs = ["TextDict.cpp"], + hdrs = ["TextDict.hpp"], + deps = [ + ":common", + ":lexicon", + ":serializable_dict", + ], +) + +cc_test( + name = "text_dict_test", + srcs = ["TextDictTest.cpp"], + deps = [ + ":text_dict", + ":text_dict_test_base", + "@googletest//:gtest_main", + ], +) + +cc_library( + name = "text_dict_test_base", + testonly = True, + srcs = ["TextDictTestBase.hpp"], + deps = [ + ":lexicon", + ":test_utils", + ":test_utils_utf8", + ":text_dict", + ], +) + +cc_library( + name = "utf8_string_slice", + srcs = ["UTF8StringSlice.cpp"], + hdrs = ["UTF8StringSlice.hpp"], + deps = [ + ":common", + ":utf8_util", + ], +) + +cc_library( + name = "utf8_util", + srcs = ["UTF8Util.cpp"], + hdrs = ["UTF8Util.hpp"], + deps = [ + ":common", + ":exception", + ], +) + +cc_test( + name = "utf8_util_test", + srcs = ["UTF8UtilTest.cpp"], + deps = [ + ":test_utils", + ":utf8_util", + "@googletest//:gtest_main", + ], +) diff --git a/src/BinaryDict.cpp b/src/BinaryDict.cpp index c577566..3a9398b 100644 --- a/src/BinaryDict.cpp +++ b/src/BinaryDict.cpp @@ -67,11 +67,12 @@ void BinaryDict::SerializeToFile(FILE* fp) const { } BinaryDictPtr BinaryDict::NewFromFile(FILE* fp) { - size_t offsetBound, savedOffset; - savedOffset = ftell(fp); + long savedOffset = ftell(fp); fseek(fp, 0L, SEEK_END); - offsetBound = ftell(fp) - savedOffset; + long offsetBoundLong = ftell(fp) - savedOffset; fseek(fp, savedOffset, SEEK_SET); + assert(offsetBoundLong >= 0); + size_t offsetBound = static_cast(offsetBoundLong); BinaryDictPtr dict(new BinaryDict(LexiconPtr(new Lexicon))); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 75eda02..7768c89 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -10,7 +10,7 @@ if(NOT USE_SYSTEM_RAPIDJSON) include_directories(../deps/rapidjson-1.1.0) endif() if(NOT USE_SYSTEM_TCLAP) - include_directories(../deps/tclap-1.2.2) + include_directories(../deps/tclap-1.2.5) endif() # Library @@ -85,7 +85,7 @@ set(UNITTESTS if (ENABLE_DARTS) set(OPENCC_ENABLE_DARTS 1) if(NOT USE_SYSTEM_DARTS) - include_directories(../deps/darts-clone) + include_directories(../deps/darts-clone-0.32) endif() set( LIBOPENCC_HEADERS diff --git a/src/Config.cpp b/src/Config.cpp index a3a36b6..eee6e06 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -43,11 +43,7 @@ namespace { class ConfigInternal { public: - std::string configDirectory; - std::unordered_map< - std::string, - std::unordered_map>> - dictCache; + std::vector paths; const JSONValue& GetProperty(const JSONValue& doc, const char* name) { if (!doc.HasMember(name)) { @@ -88,16 +84,11 @@ public: if (SerializableDict::TryLoadFromFile(fileName, &dict)) { return dict; } - // Configuration directory - if ((configDirectory != "") && SerializableDict::TryLoadFromFile( - configDirectory + fileName, &dict)) { - return dict; - } - // Package data directory - if ((PACKAGE_DATA_DIRECTORY != "") && - SerializableDict::TryLoadFromFile( - PACKAGE_DATA_DIRECTORY + fileName, &dict)) { - return dict; + for (const std::string& dirPath : paths) { + std::string path = dirPath + '/' + fileName; + if (SerializableDict::TryLoadFromFile(path, &dict)) { + return dict; + } } throw FileNotFound(fileName); } @@ -138,15 +129,7 @@ public: return DictGroupPtr(new DictGroup(dicts)); } else { std::string fileName = GetStringProperty(doc, "file"); - // Read from cache - DictPtr& cache = dictCache[type][configDirectory][fileName]; - if (cache != nullptr) { - return cache; - } DictPtr dict = LoadDictFromFile(type, fileName); - - // Update Cache - cache = dict; return dict; } } @@ -209,17 +192,50 @@ public: return prefixedFileName; } } + + for (const std::string& dirPath : paths) { + std::string path = dirPath + '/' + fileName; + ifs.open(UTF8Util::GetPlatformString(path).c_str()); + if (ifs.is_open()) { + return path; + } + } + throw FileNotFound(fileName); } }; + +std::string GetParentDirectory(const std::string& path) { + size_t pos = path.rfind('/', path.length() - 1); + if (pos == std::string::npos) { + pos = path.rfind('\\', path.length() - 1); + } + if (pos == std::string::npos) { + return ""; + } + return path.substr(0, pos + 1); +} + } // namespace Config::Config() : internal(new ConfigInternal()) {} -Config::~Config() { delete (ConfigInternal*)internal; } - -ConverterPtr Config::NewFromFile(const std::string& fileName) { - ConfigInternal* impl = (ConfigInternal*)internal; +Config::~Config() { delete reinterpret_cast(internal); } + +ConverterPtr Config::NewFromFile(const std::string& fileName, + const std::vector& paths, + const char* argv0) { + ConfigInternal* impl = reinterpret_cast(internal); + impl->paths = paths; + if (argv0 != nullptr) { + std::string parent = GetParentDirectory(argv0); + if (!parent.empty()) { + impl->paths.push_back(parent); + } + } + if (PACKAGE_DATA_DIRECTORY != "") { + impl->paths.push_back(PACKAGE_DATA_DIRECTORY); + } std::string prefixedFileName = impl->FindConfigFile(fileName); std::ifstream ifs(UTF8Util::GetPlatformString(prefixedFileName)); std::string content(std::istreambuf_iterator(ifs), @@ -233,11 +249,27 @@ ConverterPtr Config::NewFromFile(const std::string& fileName) { if (slashPos != std::string::npos) { configDirectory = prefixedFileName.substr(0, slashPos) + "/"; } - return NewFromString(content, configDirectory); + if (!configDirectory.empty()) { + impl->paths.push_back(configDirectory); + } + return NewFromString(content, impl->paths); } ConverterPtr Config::NewFromString(const std::string& json, const std::string& configDirectory) { + std::vector paths; + if (!configDirectory.empty()) { + if (configDirectory.back() == '/' || configDirectory.back() == '\\') { + paths.push_back(configDirectory); + } else { + paths.push_back(configDirectory + '/'); + } + } + return NewFromString(json, paths); +} + +ConverterPtr Config::NewFromString(const std::string& json, + const std::vector& paths) { rapidjson::Document doc; doc.ParseInsitu<0>(const_cast(json.c_str())); @@ -254,15 +286,8 @@ ConverterPtr Config::NewFromString(const std::string& json, name = doc["name"].GetString(); } - ConfigInternal* impl = (ConfigInternal*)internal; - if (!configDirectory.empty()) { - if (configDirectory.back() == '/' || configDirectory.back() == '\\') - impl->configDirectory = configDirectory; - else - impl->configDirectory = configDirectory + '/'; - } else { - impl->configDirectory.clear(); - } + ConfigInternal* impl = reinterpret_cast(internal); + impl->paths = paths; // Required: segmentation SegmentationPtr segmentation = diff --git a/src/Config.hpp b/src/Config.hpp index 7a904ce..1737d01 100644 --- a/src/Config.hpp +++ b/src/Config.hpp @@ -34,7 +34,12 @@ public: ConverterPtr NewFromString(const std::string& json, const std::string& configDirectory); - ConverterPtr NewFromFile(const std::string& fileName); + ConverterPtr NewFromString(const std::string& json, + const std::vector& paths); + + ConverterPtr NewFromFile(const std::string& fileName, + const std::vector& paths = {}, + const char* argv0 = nullptr); private: void* internal; diff --git a/src/ConfigTest.cpp b/src/ConfigTest.cpp index 369c9dd..50bf857 100644 --- a/src/ConfigTest.cpp +++ b/src/ConfigTest.cpp @@ -32,7 +32,9 @@ protected: : input(utf8("燕燕于飞差池其羽之子于归远送于野")), expected(utf8("燕燕于飛差池其羽之子于歸遠送於野")) {} - virtual void SetUp() { converter = config.NewFromFile(CONFIG_TEST_PATH); } + virtual void SetUp() { + converter = config.NewFromFile(CONFIG_TEST_JSON_PATH); + } Config config; ConverterPtr converter; @@ -62,13 +64,11 @@ TEST_F(ConfigTest, NonexistingPath) { } TEST_F(ConfigTest, NewFromStringWitoutTrailingSlash) { - std::ifstream ifs(CONFIG_TEST_PATH); + std::ifstream ifs(CONFIG_TEST_JSON_PATH); std::string content(std::istreambuf_iterator(ifs), (std::istreambuf_iterator())); - std::string pathWithoutTrailingSlash = CMAKE_SOURCE_DIR "/test/config_test"; - const ConverterPtr _ = - config.NewFromString(content, pathWithoutTrailingSlash); + const ConverterPtr _ = config.NewFromString(content, CONFIG_TEST_DIR_PATH); } } // namespace opencc diff --git a/src/ConfigTestBase.hpp b/src/ConfigTestBase.hpp index fc8bb74..b3aa5da 100644 --- a/src/ConfigTestBase.hpp +++ b/src/ConfigTestBase.hpp @@ -18,17 +18,41 @@ #pragma once +#ifdef BAZEL +#include "tools/cpp/runfiles/runfiles.h" +#endif + #include "TestUtils.hpp" namespace opencc { +#ifdef CMAKE_SOURCE_DIR +class ConfigTestBase : public ::testing::Test { +protected: + ConfigTestBase() + : CONFIG_TEST_JSON_PATH(CMAKE_SOURCE_DIR + "/test/config_test/config_test.json"), + CONFIG_TEST_DIR_PATH(CMAKE_SOURCE_DIR "/test/config_test") {} + + const std::string CONFIG_TEST_JSON_PATH; + const std::string CONFIG_TEST_DIR_PATH; +}; +#endif + +#ifdef BAZEL +using bazel::tools::cpp::runfiles::Runfiles; class ConfigTestBase : public ::testing::Test { protected: ConfigTestBase() - : CONFIG_TEST_PATH(CMAKE_SOURCE_DIR - "/test/config_test/config_test.json") {} + : runfiles_(Runfiles::CreateForTest()), + CONFIG_TEST_JSON_PATH( + runfiles_->Rlocation("_main/test/config_test/config_test.json")), + CONFIG_TEST_DIR_PATH(runfiles_->Rlocation("_main/test/config_test")) {} - const std::string CONFIG_TEST_PATH; + const std::unique_ptr runfiles_; + const std::string CONFIG_TEST_JSON_PATH; + const std::string CONFIG_TEST_DIR_PATH; }; +#endif } // namespace opencc diff --git a/src/Lexicon.cpp b/src/Lexicon.cpp index f5e5140..cfb215c 100644 --- a/src/Lexicon.cpp +++ b/src/Lexicon.cpp @@ -19,8 +19,42 @@ #include #include "Lexicon.hpp" + namespace opencc { +namespace { + +DictEntry* ParseKeyValues(const char* buff, size_t lineNum) { + size_t length; + if (buff == nullptr || UTF8Util::IsLineEndingOrFileEnding(*buff)) { + return nullptr; + } + const char* pbuff = UTF8Util::FindNextInline(buff, '\t'); + if (UTF8Util::IsLineEndingOrFileEnding(*pbuff)) { + throw InvalidTextDictionary("Tabular not found " + std::string(buff), + lineNum); + } + length = static_cast(pbuff - buff); + std::string key = UTF8Util::FromSubstr(buff, length); + std::vector values; + while (!UTF8Util::IsLineEndingOrFileEnding(*pbuff)) { + buff = pbuff = UTF8Util::NextChar(pbuff); + pbuff = UTF8Util::FindNextInline(buff, ' '); + length = static_cast(pbuff - buff); + const std::string& value = UTF8Util::FromSubstr(buff, length); + values.push_back(value); + } + if (values.size() == 0) { + throw InvalidTextDictionary("No value in an item", lineNum); + } else if (values.size() == 1) { + return DictEntryFactory::New(key, values.at(0)); + } else { + return DictEntryFactory::New(key, values); + } +} + +} // namespace + void Lexicon::Sort() { std::sort(entries.begin(), entries.end(), DictEntry::UPtrLessThan); } @@ -42,4 +76,20 @@ bool Lexicon::IsUnique(std::string* dupkey) { return true; } +LexiconPtr Lexicon::ParseLexiconFromFile(FILE* fp) { + const int ENTRY_BUFF_SIZE = 4096; + char buff[ENTRY_BUFF_SIZE]; + LexiconPtr lexicon(new Lexicon); + UTF8Util::SkipUtf8Bom(fp); + size_t lineNum = 1; + while (fgets(buff, ENTRY_BUFF_SIZE, fp)) { + DictEntry* entry = ParseKeyValues(buff, lineNum); + if (entry != nullptr) { + lexicon->Add(entry); + } + lineNum++; + } + return lexicon; +} + } // namespace opencc diff --git a/src/Lexicon.hpp b/src/Lexicon.hpp index 6889128..61dcc59 100644 --- a/src/Lexicon.hpp +++ b/src/Lexicon.hpp @@ -62,6 +62,8 @@ public: return entries.end(); } + static LexiconPtr ParseLexiconFromFile(FILE* fp); + private: std::vector> entries; }; diff --git a/src/PhraseExtract.cpp b/src/PhraseExtract.cpp index a9a6246..0de70c9 100644 --- a/src/PhraseExtract.cpp +++ b/src/PhraseExtract.cpp @@ -57,7 +57,7 @@ public: marisa::Agent agent; agent.set_query(key.CString(), key.ByteLength()); if (marisa_trie.lookup(agent)) { - int item_id = marisa_id_item_map[agent.key().id()]; + size_t item_id = marisa_id_item_map[agent.key().id()]; return items[item_id].second; } @@ -99,7 +99,7 @@ private: } void BuildTrie() { - std::unordered_map key_item_id_map; + std::unordered_map key_item_id_map; marisa::Keyset keyset; for (size_t i = 0; i < items.size(); i++) { const auto& key = items[i].first; @@ -117,7 +117,7 @@ private: if (it == key_item_id_map.end()) { throw ShouldNotBeHere(); } - int item_id = it->second; + size_t item_id = it->second; marisa_id_item_map[marisa_id] = item_id; } } @@ -127,7 +127,7 @@ private: dict; std::vector items; marisa::Trie marisa_trie; - std::vector marisa_id_item_map; + std::vector marisa_id_item_map; }; using namespace internal; diff --git a/src/PhraseExtractTest.cpp b/src/PhraseExtractTest.cpp index decefac..7312424 100644 --- a/src/PhraseExtractTest.cpp +++ b/src/PhraseExtractTest.cpp @@ -158,8 +158,8 @@ TEST_F(PhraseExtractTest, SelectWords) { phraseExtract.SetWordMaxLength(3); phraseExtract.SetFullText(siShi); phraseExtract.SetPostCalculationFilter( - [](const PhraseExtract& phraseExtract, const UTF8StringSlice8Bit& word) { - return phraseExtract.Frequency(word) == 1; + [](const PhraseExtract& p, const UTF8StringSlice8Bit& word) { + return p.Frequency(word) == 1; }); phraseExtract.SelectWords(); EXPECT_EQ(std::vector({"十", "四", "是", "四十", "十四", diff --git a/src/Segments.hpp b/src/Segments.hpp index 21c3981..2186dfd 100644 --- a/src/Segments.hpp +++ b/src/Segments.hpp @@ -32,7 +32,7 @@ public: Segments() {} Segments(std::initializer_list initList) { - for (const std::string& item : initList) { + for (const char* item : initList) { AddSegment(item); } } diff --git a/src/SimpleConverter.cpp b/src/SimpleConverter.cpp index 417f7a5..7b24ac6 100644 --- a/src/SimpleConverter.cpp +++ b/src/SimpleConverter.cpp @@ -16,33 +16,69 @@ * limitations under the License. */ -#ifdef _MSC_VER -#define NOMINMAX -#include -#undef NOMINMAX -#endif // _MSC_VER - #include "Config.hpp" #include "Converter.hpp" #include "UTF8Util.hpp" #include "opencc.h" +#ifdef BAZEL +#include "tools/cpp/runfiles/runfiles.h" +using bazel::tools::cpp::runfiles::Runfiles; +#endif + using namespace opencc; +namespace { + struct InternalData { const ConverterPtr converter; InternalData(const ConverterPtr& _converter) : converter(_converter) {} -}; -SimpleConverter::SimpleConverter(const std::string& configFileName) { - try { - Config config; - internalData = new InternalData(config.NewFromFile(configFileName)); - } catch (Exception& ex) { - throw std::runtime_error(ex.what()); + static InternalData* NewInternalData(const std::string& configFileName, + const std::vector& paths, + const char* argv0) { + try { + Config config; +#ifdef BAZEL + std::string err; + std::unique_ptr bazel_runfiles( + Runfiles::Create(argv0 != nullptr ? argv0 : "", &err)); + if (bazel_runfiles != nullptr) { + std::vector paths_with_runfiles = paths; + paths_with_runfiles.push_back( + bazel_runfiles->Rlocation("opencc~/data/config")); + paths_with_runfiles.push_back( + bazel_runfiles->Rlocation("opencc~/data/dictionary")); + paths_with_runfiles.push_back( + bazel_runfiles->Rlocation("_main/data/config")); + paths_with_runfiles.push_back( + bazel_runfiles->Rlocation("_main/data/dictionary")); + return new InternalData( + config.NewFromFile(configFileName, paths_with_runfiles)); + } +#endif + return new InternalData(config.NewFromFile(configFileName, paths, argv0)); + } catch (Exception& ex) { + throw std::runtime_error(ex.what()); + } } -} +}; + +} // namespace + +SimpleConverter::SimpleConverter(const std::string& configFileName) + : SimpleConverter(configFileName, std::vector()) {} + +SimpleConverter::SimpleConverter(const std::string& configFileName, + const std::vector& paths) + : SimpleConverter(configFileName, paths, nullptr) {} + +SimpleConverter::SimpleConverter(const std::string& configFileName, + const std::vector& paths, + const char* argv0) + : internalData( + InternalData::NewInternalData(configFileName, paths, argv0)) {} SimpleConverter::~SimpleConverter() { delete (InternalData*)internalData; } diff --git a/src/SimpleConverter.hpp b/src/SimpleConverter.hpp index 56932b7..c7cda2b 100644 --- a/src/SimpleConverter.hpp +++ b/src/SimpleConverter.hpp @@ -18,6 +18,7 @@ #include "Export.hpp" #include +#include #ifndef __OPENCC_SIMPLECONVERTER_HPP_ #define __OPENCC_SIMPLECONVERTER_HPP_ @@ -29,6 +30,7 @@ */ namespace opencc { + /** * A high level converter * This interface does not require C++11 to compile. @@ -40,7 +42,25 @@ public: * Constructor of SimpleConverter * @param configFileName File name of configuration. */ - SimpleConverter(const std::string& configFileName); + explicit SimpleConverter(const std::string& configFileName); + + /** + * Constructor of SimpleConverter + * @param configFileName File name of configuration. + * @param paths Additional paths to locate configuration and dictionary files. + */ + SimpleConverter(const std::string& configFileName, + const std::vector& paths); + + /** + * Constructor of SimpleConverter + * @param configFileName File name of configuration. + * @param paths Additional paths to locate configuration and dictionary files. + * @param argv0 Path of the executable (argv[0]), in addition to additional + * paths. + */ + SimpleConverter(const std::string& configFileName, + const std::vector& paths, const char* argv0); ~SimpleConverter(); diff --git a/src/SimpleConverterTest.cpp b/src/SimpleConverterTest.cpp index 9c645b7..199ba5e 100644 --- a/src/SimpleConverterTest.cpp +++ b/src/SimpleConverterTest.cpp @@ -37,15 +37,15 @@ protected: } }; -TEST_F(SimpleConverterTest, Convert) { TestConverter(CONFIG_TEST_PATH); } +TEST_F(SimpleConverterTest, Convert) { TestConverter(CONFIG_TEST_JSON_PATH); } TEST_F(SimpleConverterTest, Multithreading) { const auto& routine = [this](const std::string& config) { TestConverter(config); }; - std::thread thread1(routine, CONFIG_TEST_PATH); - std::thread thread2(routine, CONFIG_TEST_PATH); - routine(CONFIG_TEST_PATH); + std::thread thread1(routine, CONFIG_TEST_JSON_PATH); + std::thread thread2(routine, CONFIG_TEST_JSON_PATH); + routine(CONFIG_TEST_JSON_PATH); thread1.join(); thread2.join(); } @@ -54,7 +54,7 @@ TEST_F(SimpleConverterTest, CInterface) { const std::string& text = utf8("燕燕于飞差池其羽之子于归远送于野"); const std::string& expected = utf8("燕燕于飛差池其羽之子于歸遠送於野"); { - opencc_t od = opencc_open(CONFIG_TEST_PATH.c_str()); + opencc_t od = opencc_open(CONFIG_TEST_JSON_PATH.c_str()); char* converted = opencc_convert_utf8(od, text.c_str(), (size_t)-1); EXPECT_EQ(expected, converted); opencc_convert_utf8_free(converted); @@ -62,7 +62,7 @@ TEST_F(SimpleConverterTest, CInterface) { } { char output[1024]; - opencc_t od = opencc_open(CONFIG_TEST_PATH.c_str()); + opencc_t od = opencc_open(CONFIG_TEST_JSON_PATH.c_str()); size_t length = opencc_convert_utf8_to_buffer(od, text.c_str(), (size_t)-1, output); EXPECT_EQ(expected.length(), length); diff --git a/src/TextDict.cpp b/src/TextDict.cpp index c81b311..34d024e 100644 --- a/src/TextDict.cpp +++ b/src/TextDict.cpp @@ -33,51 +33,6 @@ static size_t GetKeyMaxLength(const LexiconPtr& lexicon) { return maxLength; } -static DictEntry* ParseKeyValues(const char* buff, size_t lineNum) { - size_t length; - if (buff == nullptr || UTF8Util::IsLineEndingOrFileEnding(*buff)) { - return nullptr; - } - const char* pbuff = UTF8Util::FindNextInline(buff, '\t'); - if (UTF8Util::IsLineEndingOrFileEnding(*pbuff)) { - throw InvalidTextDictionary("Tabular not found " + std::string(buff), - lineNum); - } - length = static_cast(pbuff - buff); - std::string key = UTF8Util::FromSubstr(buff, length); - std::vector values; - while (!UTF8Util::IsLineEndingOrFileEnding(*pbuff)) { - buff = pbuff = UTF8Util::NextChar(pbuff); - pbuff = UTF8Util::FindNextInline(buff, ' '); - length = static_cast(pbuff - buff); - const std::string& value = UTF8Util::FromSubstr(buff, length); - values.push_back(value); - } - if (values.size() == 0) { - throw InvalidTextDictionary("No value in an item", lineNum); - } else if (values.size() == 1) { - return DictEntryFactory::New(key, values.at(0)); - } else { - return DictEntryFactory::New(key, values); - } -} - -static LexiconPtr ParseLexiconFromFile(FILE* fp) { - const int ENTRY_BUFF_SIZE = 4096; - char buff[ENTRY_BUFF_SIZE]; - LexiconPtr lexicon(new Lexicon); - UTF8Util::SkipUtf8Bom(fp); - size_t lineNum = 1; - while (fgets(buff, ENTRY_BUFF_SIZE, fp)) { - DictEntry* entry = ParseKeyValues(buff, lineNum); - if (entry != nullptr) { - lexicon->Add(entry); - } - lineNum++; - } - return lexicon; -} - TextDict::TextDict(const LexiconPtr& _lexicon) : maxLength(GetKeyMaxLength(_lexicon)), lexicon(_lexicon) { assert(lexicon->IsSorted()); @@ -87,12 +42,12 @@ TextDict::TextDict(const LexiconPtr& _lexicon) TextDict::~TextDict() {} TextDictPtr TextDict::NewFromSortedFile(FILE* fp) { - const LexiconPtr& lexicon = ParseLexiconFromFile(fp); + const LexiconPtr& lexicon = Lexicon::ParseLexiconFromFile(fp); return TextDictPtr(new TextDict(lexicon)); } TextDictPtr TextDict::NewFromFile(FILE* fp) { - const LexiconPtr& lexicon = ParseLexiconFromFile(fp); + const LexiconPtr& lexicon = Lexicon::ParseLexiconFromFile(fp); lexicon->Sort(); std::string dupkey; if (!lexicon->IsUnique(&dupkey)) { diff --git a/src/UTF8Util.hpp b/src/UTF8Util.hpp index 9d3e39b..6babd36 100644 --- a/src/UTF8Util.hpp +++ b/src/UTF8Util.hpp @@ -19,9 +19,10 @@ #pragma once #ifdef _MSC_VER +#ifndef NOMINMAX #define NOMINMAX +#endif #include -#undef NOMINMAX #endif // _MSC_VER #include diff --git a/src/opencc_config.h b/src/opencc_config.h new file mode 100644 index 0000000..83d0465 --- /dev/null +++ b/src/opencc_config.h @@ -0,0 +1,3 @@ +#pragma once + +#define OPENCC_ENABLE_DARTS diff --git a/src/tools/BUILD.bazel b/src/tools/BUILD.bazel new file mode 100644 index 0000000..ac4e691 --- /dev/null +++ b/src/tools/BUILD.bazel @@ -0,0 +1,30 @@ +package(default_visibility = ["//visibility:public"]) + +cc_binary( + name = "command_line", + srcs = ["CommandLine.cpp"], + deps = [ + "//src:cmd_line_output", + "//src:config", + "//src:converter", + "//src:utf8_util", + ], +) + +cc_binary( + name = "dict_converter", + srcs = ["DictConverter.cpp"], + deps = [ + "//src:cmd_line_output", + "//src:dict_converter", + ], +) + +cc_binary( + name = "phrase_extract", + srcs = ["PhraseExtract.cpp"], + deps = [ + "//src:cmd_line_output", + "//src:phrase_extract", + ], +) diff --git a/src/tools/CMakeLists.txt b/src/tools/CMakeLists.txt index 373b9cc..bf619ea 100644 --- a/src/tools/CMakeLists.txt +++ b/src/tools/CMakeLists.txt @@ -1,5 +1,7 @@ # Executables +include_directories("${PROJECT_SOURCE_DIR}") + ## opencc add_executable(opencc CommandLine.cpp) target_link_libraries(opencc libopencc) diff --git a/src/tools/CommandLine.cpp b/src/tools/CommandLine.cpp index b12ab5a..e029d3d 100644 --- a/src/tools/CommandLine.cpp +++ b/src/tools/CommandLine.cpp @@ -18,10 +18,10 @@ #include -#include "CmdLineOutput.hpp" -#include "Config.hpp" -#include "Converter.hpp" -#include "UTF8Util.hpp" +#include "src/CmdLineOutput.hpp" +#include "src/Config.hpp" +#include "src/Converter.hpp" +#include "src/UTF8Util.hpp" using namespace opencc; @@ -184,6 +184,9 @@ int main(int argc, const char* argv[]) { TCLAP::ValueArg noFlushArg( "", "noflush", "Disable flush for every line", false /* required */, false /* default */, "bool" /* type */, cmd); + TCLAP::MultiArg pathArg( + "", "path", "Additional paths to locate config and dictionary files.", + false /* required */, "file" /* type */, cmd); cmd.parse(argc, argv); configFileName = configArg.getValue(); noFlush = noFlushArg.getValue(); @@ -194,7 +197,7 @@ int main(int argc, const char* argv[]) { outputFileName = Optional(outputArg.getValue()); noFlush = true; } - converter = config.NewFromFile(configFileName); + converter = config.NewFromFile(configFileName, pathArg.getValue()); bool lineByLine = inputFileName.IsNull(); if (lineByLine) { ConvertLineByLine(); diff --git a/src/tools/DictConverter.cpp b/src/tools/DictConverter.cpp index bb5ea8f..8389ede 100644 --- a/src/tools/DictConverter.cpp +++ b/src/tools/DictConverter.cpp @@ -16,9 +16,9 @@ * limitations under the License. */ -#include "DictConverter.hpp" -#include "CmdLineOutput.hpp" -#include "Exception.hpp" +#include "src/DictConverter.hpp" +#include "src/CmdLineOutput.hpp" +#include "src/Exception.hpp" using namespace opencc; diff --git a/src/tools/PhraseExtract.cpp b/src/tools/PhraseExtract.cpp index eddb18d..08eb7d7 100644 --- a/src/tools/PhraseExtract.cpp +++ b/src/tools/PhraseExtract.cpp @@ -18,8 +18,8 @@ #include -#include "CmdLineOutput.hpp" -#include "PhraseExtract.hpp" +#include "src/CmdLineOutput.hpp" +#include "src/PhraseExtract.hpp" using opencc::Exception; using opencc::PhraseExtract; diff --git a/test/BUILD.bazel b/test/BUILD.bazel new file mode 100644 index 0000000..71a118a --- /dev/null +++ b/test/BUILD.bazel @@ -0,0 +1,26 @@ +cc_test( + name = "bazel_opencc_test", + srcs = ["BazelOpenccTest.cpp"], + deps = [ + "//:opencc", + "@googletest//:gtest_main", + ], +) + +cc_test( + name = "command_line_converter_test", + srcs = ["CommandLineConvertTest.cpp"], + data = [ + "//data/config", + "//data/dictionary:binary_dictionaries", + "//data/dictionary:text_dictionaries", + "//src/tools:command_line", + "//test/testcases", + ], + defines = ["BAZEL"], + deps = [ + "//src:common", + "@bazel_tools//tools/cpp/runfiles", + "@googletest//:gtest_main", + ], +) diff --git a/test/BazelOpenccTest.cpp b/test/BazelOpenccTest.cpp new file mode 100644 index 0000000..70d13e5 --- /dev/null +++ b/test/BazelOpenccTest.cpp @@ -0,0 +1,54 @@ +/* + * Open Chinese Convert + * + * Copyright 2024-2024 Carbo Kuo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "opencc.h" +#include "gtest/gtest.h" + +namespace opencc { + +class BazelOpenccTest : public ::testing::Test {}; + +TEST_F(BazelOpenccTest, SimpleConverter_s2t) { + SimpleConverter converter(OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD); + EXPECT_EQ(converter.Convert("简化字测试"), "簡化字測試"); +} + +TEST_F(BazelOpenccTest, SimpleConverter_t2s) { + SimpleConverter converter(OPENCC_DEFAULT_CONFIG_TRAD_TO_SIMP); + EXPECT_EQ(converter.Convert("簡化字測試"), "简化字测试"); +} + +TEST_F(BazelOpenccTest, CInterface_s2t) { + std::string text = "简化字测试"; + opencc_t od = opencc_open(OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD); + char* converted = opencc_convert_utf8(od, text.c_str(), (size_t)-1); + EXPECT_STREQ("簡化字測試", converted); + opencc_convert_utf8_free(converted); + EXPECT_EQ(0, opencc_close(od)); +} + +TEST_F(BazelOpenccTest, CInterface_t2s) { + std::string text = "簡化字測試"; + opencc_t od = opencc_open(OPENCC_DEFAULT_CONFIG_TRAD_TO_SIMP); + char* converted = opencc_convert_utf8(od, text.c_str(), (size_t)-1); + EXPECT_STREQ("简化字测试", converted); + opencc_convert_utf8_free(converted); + EXPECT_EQ(0, opencc_close(od)); +} + +} // namespace opencc diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e6dc3a7..61ce033 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,3 +1,4 @@ +include_directories("${PROJECT_SOURCE_DIR}") include_directories("${PROJECT_BINARY_DIR}/src") include_directories("${PROJECT_SOURCE_DIR}/src") @@ -28,7 +29,7 @@ if (ENABLE_GTEST) endif() if(NOT USE_SYSTEM_GTEST) - include_directories(../deps/gtest-1.7.0/include) + include_directories(../deps/googletest-1.15.0/googletest/include) endif() set(UNITTESTS CommandLineConvertTest diff --git a/test/CommandLineConvertTest.cpp b/test/CommandLineConvertTest.cpp index 6bfd20a..7be8a39 100644 --- a/test/CommandLineConvertTest.cpp +++ b/test/CommandLineConvertTest.cpp @@ -1,7 +1,7 @@ /* * Open Chinese Convert * - * Copyright 2015 Carbo Kuo + * Copyright 2015-2024 Carbo Kuo * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,9 +19,14 @@ #include #include -#include "Common.hpp" +#include "src/Common.hpp" #include "gtest/gtest.h" +#ifdef BAZEL +#include "tools/cpp/runfiles/runfiles.h" +using bazel::tools::cpp::runfiles::Runfiles; +#endif + namespace opencc { class CommandLineConvertTest : public ::testing::Test { @@ -31,16 +36,20 @@ protected: virtual ~CommandLineConvertTest() { free(originalWorkingDirectory); } virtual void SetUp() { +#ifdef BAZEL + runfiles_.reset(Runfiles::CreateForTest()); +#else ASSERT_NE("", PROJECT_BINARY_DIR); ASSERT_NE("", CMAKE_SOURCE_DIR); ASSERT_EQ(0, chdir(PROJECT_BINARY_DIR "/data")); +#endif } virtual void TearDown() { ASSERT_EQ(0, chdir(originalWorkingDirectory)); } std::string GetFileContents(const std::string& fileName) const { std::ifstream fs(fileName); - EXPECT_TRUE(fs.is_open()); + EXPECT_TRUE(fs.is_open()) << fileName; const std::string content((std::istreambuf_iterator(fs)), (std::istreambuf_iterator())); fs.close(); @@ -51,7 +60,10 @@ protected: originalWorkingDirectory = getcwd(nullptr, 0); } - const char* OpenccCommand() const { + std::string OpenccCommand() const { +#ifdef BAZEL + return runfiles_->Rlocation("_main/src/tools/command_line"); +#else #ifndef _MSC_VER return PROJECT_BINARY_DIR "/src/tools/opencc"; #else @@ -60,42 +72,71 @@ protected: #else return PROJECT_BINARY_DIR "/src/tools/Debug/opencc.exe"; #endif +#endif #endif } - const char* InputDirectory() const { + std::string InputDirectory() const { +#ifdef BAZEL + return runfiles_->Rlocation("_main/test/testcases") + "/"; +#else return CMAKE_SOURCE_DIR "/test/testcases/"; +#endif } - const char* OutputDirectory() const { return PROJECT_BINARY_DIR "/test/"; } + std::string OutputDirectory() const { +#ifdef BAZEL + return ::testing::TempDir() + "/"; +#else + return PROJECT_BINARY_DIR "/test/"; +#endif + } - const char* AnswerDirectory() const { + std::string AnswerDirectory() const { +#ifdef BAZEL + return runfiles_->Rlocation("_main/test/testcases") + "/"; +#else return CMAKE_SOURCE_DIR "/test/testcases/"; +#endif } - const char* ConfigurationDirectory() const { + std::string ConfigurationDirectory() const { +#ifdef BAZEL + return ""; +#else return CMAKE_SOURCE_DIR "/data/config/"; +#endif } std::string InputFile(const char* config) const { - return std::string(InputDirectory()) + config + ".in"; + return InputDirectory() + config + ".in"; } std::string OutputFile(const char* config) const { - return std::string(OutputDirectory()) + config + ".out"; + return OutputDirectory() + config + ".out"; } std::string AnswerFile(const char* config) const { - return std::string(AnswerDirectory()) + config + ".ans"; + return AnswerDirectory() + config + ".ans"; } std::string TestCommand(const char* config, const std::string& inputFile, const std::string& outputFile) const { - return OpenccCommand() + std::string("") + " -i " + inputFile + " -o " + - outputFile + " -c " + ConfigurationDirectory() + config + ".json"; + std::string cmd = OpenccCommand() + " -i " + inputFile + " -o " + + outputFile + " -c " + ConfigurationDirectory() + config + + ".json"; +#ifdef BAZEL + cmd += " --path " + runfiles_->Rlocation("_main/data/dictionary") + "/" + + " --path " + runfiles_->Rlocation("_main/data/config") + "/"; +#endif + return cmd; } char* originalWorkingDirectory; + +#ifdef BAZEL + std::unique_ptr runfiles_; +#endif }; class ConfigurationTest : public CommandLineConvertTest, @@ -128,10 +169,12 @@ TEST_P(ConfigurationTest, InPlaceConvert) { ASSERT_EQ(answer, output); } -INSTANTIATE_TEST_SUITE_P(CommandLine, ConfigurationTest, - ::testing::Values("hk2s", "hk2t", "jp2t", "s2hk", - "s2t", "s2tw", "s2twp", "t2hk", - "t2jp", "t2s", "tw2s", "tw2sp", - "tw2t")); +INSTANTIATE_TEST_SUITE_P( + CommandLine, ConfigurationTest, + ::testing::Values("hk2s", "hk2t", "jp2t", "s2hk", "s2t", "s2tw", "s2twp", + "t2hk", "t2jp", "t2s", "tw2s", "tw2sp", "tw2t"), + [](const testing::TestParamInfo& info) { + return info.param; + }); } // namespace opencc diff --git a/test/config_test/BUILD.bazel b/test/config_test/BUILD.bazel new file mode 100644 index 0000000..b27f1cc --- /dev/null +++ b/test/config_test/BUILD.bazel @@ -0,0 +1,9 @@ +filegroup( + name = "config_test", + srcs = [ + "config_test.json", + "config_test_characters.txt", + "config_test_phrases.txt", + ], + visibility = ["//visibility:public"], +) diff --git a/test/testcases/BUILD.bazel b/test/testcases/BUILD.bazel new file mode 100644 index 0000000..4c9a63a --- /dev/null +++ b/test/testcases/BUILD.bazel @@ -0,0 +1,6 @@ +package(default_visibility = ["//visibility:public"]) + +filegroup( + name = "testcases", + srcs = glob(["*.in"]) + glob(["*.ans"]), +) diff --git a/test/testcases/s2twp.ans b/test/testcases/s2twp.ans index b076425..9abfa6f 100644 --- a/test/testcases/s2twp.ans +++ b/test/testcases/s2twp.ans @@ -1,4 +1,6 @@ 滑鼠裡面的矽二極體壞了,導致游標解析度降低。 我們在寮國的伺服器的硬碟需要使用網際網路演算法軟體解決非同步的問題。 為什麼你在床裡面睡著? -海內存知己 \ No newline at end of file +海內存知己 +摩爾線程 +字節跳動 \ No newline at end of file diff --git a/test/testcases/s2twp.in b/test/testcases/s2twp.in index cfedb29..d16681a 100644 --- a/test/testcases/s2twp.in +++ b/test/testcases/s2twp.in @@ -1,4 +1,6 @@ 鼠标里面的硅二极管坏了,导致光标分辨率降低。 我们在老挝的服务器的硬盘需要使用互联网算法软件解决异步的问题。 为什么你在床里面睡着? -海内存知己 \ No newline at end of file +海内存知己 +摩尔线程 +字节跳动 \ No newline at end of file